From 712f10ced005069e0fac421379087c21abd37b73 Mon Sep 17 00:00:00 2001 From: cyclinder Date: Fri, 5 Jan 2024 13:52:16 +0800 Subject: [PATCH] Spidercoordinator: It able to get CIDR from kubeadm-config If the kube-controller-manager Pod is running as systemd precess rather than Pod, In this case, We can't get the CIDR from the KCM Pod. We can get the CIDR from the kubeadm-config configMap. Signed-off-by: cyclinder --- .../overlay/get-started-calico-zh_cn.md | 27 ++- .../install/overlay/get-started-calico.md | 27 ++- .../overlay/get-started-cilium-zh_cn.md | 27 ++- .../install/overlay/get-started-cilium.md | 27 ++- .../coordinator_informer.go | 91 ++++++---- test/doc/spidercoodinator.md | 1 + .../spidercoordinator_test.go | 165 ++++++++++++++++-- 7 files changed, 310 insertions(+), 55 deletions(-) diff --git a/docs/usage/install/overlay/get-started-calico-zh_cn.md b/docs/usage/install/overlay/get-started-calico-zh_cn.md index 387d5ab20a..402f95115a 100644 --- a/docs/usage/install/overlay/get-started-calico-zh_cn.md +++ b/docs/usage/install/overlay/get-started-calico-zh_cn.md @@ -88,10 +88,31 @@ status: serviceCIDR: - 10.233.0.0/18 ``` + +> 目前 Spiderpool 优先通过查询 `kube-system/kubeadm-config` ConfigMap 获取集群的 Pod 和 Service 子网。 如果 kubeadm-config 不存在导致无法获取集群子网,那么 Spiderpool 会从 Kube-controller-manager Pod 中获取集群 Pod 和 Service 的子网。 如果您集群的 Kube-controller-manager 组件以 `systemd` 方式而不是以静态 Pod 运行。那么 Spiderpool 仍然无法获取集群的子网信息。 -> 1. 如果 phase 不为 Synced, 那么将会阻止 Pod 被创建 -> -> 2. 如果 overlayPodCIDR 不正常, 可能会导致通信问题 +如果上面两种方式都失败,Spiderpool 会同步 status.phase 为 NotReady, 这将会阻止 Pod 被创建。我们可以通过下面解决异常情况: + +- 手动创建 kubeadm-config ConfigMap, 并正确配置集群的子网信息: + +```shell +export POD_SUBNET= +export SERVICE_SUBNET= +cat << EOF | kubectl apply -f - +apiVersion: v1 +kind: ConfigMap +metadata: + name: kubeadm-config + namespace: kube-system +data: + ClusterConfiguration: + networking: + podSubnet: ${POD_SUBNET} + serviceSubnet: ${SERVICE_SUBNET} +EOF +``` + +一旦创建完成,Spiderpool 将会自动同步其状态。 ### 创建 SpiderIPPool diff --git a/docs/usage/install/overlay/get-started-calico.md b/docs/usage/install/overlay/get-started-calico.md index f16932c2ad..0d87e48c47 100644 --- a/docs/usage/install/overlay/get-started-calico.md +++ b/docs/usage/install/overlay/get-started-calico.md @@ -84,9 +84,30 @@ status: - 10.233.0.0/18 ``` -> 1. If the phase is not synced, the pod will be prevented from being created. -> -> 2. If the overlayPodCIDR does not meet expectations, it may cause pod communication issue. +> At present, Spiderpool prioritizes obtaining the cluster's Pod and Service subnets by querying the kube-system/kubeadm-config ConfigMap. If the kubeadm-config does not exist, causing the failure to obtain the cluster subnet, Spiderpool will attempt to retrieve the cluster Pod and Service subnets from the kube-controller-manager Pod. If the kube-controller-manager component in your cluster runs in systemd mode instead of as a static Pod, Spiderpool still cannot retrieve the cluster's subnet information. + +If both of the above methods fail, Spiderpool will synchronize the status.phase as NotReady, preventing Pod creation. To address such abnormal situations, we can take either of the following approaches: + +- Manually create the kubeadm-config ConfigMap and correctly configure the cluster's subnet information: + +```shell +export POD_SUBNET= +export SERVICE_SUBNET= +cat << EOF | kubectl apply -f - +apiVersion: v1 +kind: ConfigMap +metadata: + name: kubeadm-config + namespace: kube-system +data: + ClusterConfiguration: + networking: + podSubnet: ${POD_SUBNET} + serviceSubnet: ${SERVICE_SUBNET} +EOF +``` + +Once created, Spiderpool will automatically synchronize its status. ### Create SpiderIPPool diff --git a/docs/usage/install/overlay/get-started-cilium-zh_cn.md b/docs/usage/install/overlay/get-started-cilium-zh_cn.md index a46d579d2e..90f9d1a7f4 100644 --- a/docs/usage/install/overlay/get-started-cilium-zh_cn.md +++ b/docs/usage/install/overlay/get-started-cilium-zh_cn.md @@ -85,9 +85,30 @@ status: - 10.233.0.0/18 ``` -> 1. 如果 phase 不为 Synced, 那么将会阻止 Pod 被创建 -> -> 2. 如果 overlayPodCIDR 不正常, 可能会导致通信问题 +> 目前 Spiderpool 优先通过查询 `kube-system/kubeadm-config` ConfigMap 获取集群的 Pod 和 Service 子网。 如果 kubeadm-config 不存在导致无法获取集群子网,那么 Spiderpool 会从 Kube-controller-manager Pod 中获取集群 Pod 和 Service 的子网。 如果您集群的 Kube-controller-manager 组件以 `systemd` 方式而不是以静态 Pod 运行。那么 Spiderpool 仍然无法获取集群的子网信息。 + +如果上面两种方式都失败,Spiderpool 会同步 status.phase 为 NotReady, 这将会阻止 Pod 被创建。我们可以通过下面的方式解决异常情况: + +- 手动创建 kubeadm-config ConfigMap, 并正确配置集群的子网信息: + +```shell +export POD_SUBNET= +export SERVICE_SUBNET= +cat << EOF | kubectl apply -f - +apiVersion: v1 +kind: ConfigMap +metadata: + name: kubeadm-config + namespace: kube-system +data: + ClusterConfiguration: | + networking: + podSubnet: ${POD_SUBNET} + serviceSubnet: ${SERVICE_SUBNET} +EOF +``` + +一旦创建完成,Spiderpool 将会自动同步其状态。 ### 创建 SpiderIPPool diff --git a/docs/usage/install/overlay/get-started-cilium.md b/docs/usage/install/overlay/get-started-cilium.md index 97e4c4507d..5d645a359b 100644 --- a/docs/usage/install/overlay/get-started-cilium.md +++ b/docs/usage/install/overlay/get-started-cilium.md @@ -85,9 +85,30 @@ status: - 10.233.0.0/18 ``` -> 1. If the phase is not synced, the pod will be prevented from being created. -> -> 2. If the overlayPodCIDR does not meet expectations, it may cause pod communication issue. +> At present, Spiderpool prioritizes obtaining the cluster's Pod and Service subnets by querying the kube-system/kubeadm-config ConfigMap. If the kubeadm-config does not exist, causing the failure to obtain the cluster subnet, Spiderpool will attempt to retrieve the cluster Pod and Service subnets from the kube-controller-manager Pod. If the kube-controller-manager component in your cluster runs in systemd mode instead of as a static Pod, Spiderpool still cannot retrieve the cluster's subnet information. + +If both of the above methods fail, Spiderpool will synchronize the status.phase as NotReady, preventing Pod creation. To address such abnormal situations, we can take either of the following approaches: + +- Manually create the kubeadm-config ConfigMap and correctly configure the cluster's subnet information: + +```shell +export POD_SUBNET= +export SERVICE_SUBNET= +cat << EOF | kubectl apply -f - +apiVersion: v1 +kind: ConfigMap +metadata: + name: kubeadm-config + namespace: kube-system +data: + ClusterConfiguration: + networking: + podSubnet: ${POD_SUBNET} + serviceSubnet: ${SERVICE_SUBNET} +EOF +``` + +Once created, Spiderpool will automatically synchronize its status. ### Create SpiderIPPool diff --git a/pkg/coordinatormanager/coordinator_informer.go b/pkg/coordinatormanager/coordinator_informer.go index 6b61259e6c..980c107723 100644 --- a/pkg/coordinatormanager/coordinator_informer.go +++ b/pkg/coordinatormanager/coordinator_informer.go @@ -334,36 +334,8 @@ func (cc *CoordinatorController) syncHandler(ctx context.Context, coordinatorNam } func (cc *CoordinatorController) fetchPodAndServerCIDR(ctx context.Context, logger *zap.Logger, coordCopy *spiderpoolv2beta1.SpiderCoordinator) (*spiderpoolv2beta1.SpiderCoordinator, error) { - var err error - var cmPodList corev1.PodList - if err := cc.APIReader.List(ctx, &cmPodList, client.MatchingLabels{"component": "kube-controller-manager"}); err != nil { - event.EventRecorder.Eventf( - coordCopy, - corev1.EventTypeWarning, - "ClusterNotReady", - err.Error(), - ) - - setStatus2NoReady(logger, coordCopy) - return coordCopy, err - } - if len(cmPodList.Items) == 0 { - msg := `Failed to get kube-controller-manager Pod with label "component: kube-controller-manager"` - event.EventRecorder.Eventf( - coordCopy, - corev1.EventTypeWarning, - "ClusterNotReady", - msg, - ) - - setStatus2NoReady(logger, coordCopy) - return coordCopy, err - } - - k8sPodCIDR, k8sServiceCIDR := extractK8sCIDR(&cmPodList.Items[0]) if *coordCopy.Spec.PodCIDRType == auto { - var podCidrType string - podCidrType, err = fetchType(cc.DefaultCniConfDir) + podCidrType, err := fetchType(cc.DefaultCniConfDir) if err != nil { if apierrors.IsNotFound(err) { event.EventRecorder.Eventf( @@ -381,6 +353,30 @@ func (cc *CoordinatorController) fetchPodAndServerCIDR(ctx context.Context, logg coordCopy.Spec.PodCIDRType = &podCidrType } + var err error + var cm *corev1.ConfigMap + var k8sPodCIDR, k8sServiceCIDR []string + if err := cc.APIReader.Get(ctx, types.NamespacedName{Namespace: metav1.NamespaceSystem, Name: "kubeadm-config"}, cm); err == nil && cm != nil { + logger.Sugar().Infof("Trying to fetch the ClusterCIDR from kube-system/kubeadm-config") + k8sPodCIDR, k8sServiceCIDR = ExtractK8sCIDRFromKubeadmConfigMap(cm) + } else { + logger.Sugar().Warn("kube-system/kubeadm-config is no found, trying to fetch the ClusterCIDR from kube-controller-manager Pod") + var cmPodList corev1.PodList + err = cc.APIReader.List(ctx, &cmPodList, client.MatchingLabels{"component": "kube-controller-manager"}) + if err != nil { + logger.Sugar().Errorf("failed to get kube-controller-manager Pod with label \"component: kube-controller-manager\": %v", err) + event.EventRecorder.Eventf( + coordCopy, + corev1.EventTypeWarning, + "ClusterNotReady", + "Neither kubeadm-config ConfigMap nor kube-controller-manager Pod can be found", + ) + setStatus2NoReady(logger, coordCopy) + return coordCopy, err + } + k8sPodCIDR, k8sServiceCIDR = ExtractK8sCIDRFromKCMPod(&cmPodList.Items[0]) + } + switch *coordCopy.Spec.PodCIDRType { case cluster: if cc.caliCtrlCanncel != nil { @@ -538,7 +534,42 @@ func (cc *CoordinatorController) fetchCiliumCIDR(ctx context.Context, logger *za return nil } -func extractK8sCIDR(kcm *corev1.Pod) ([]string, []string) { +func ExtractK8sCIDRFromKubeadmConfigMap(cm *corev1.ConfigMap) ([]string, []string) { + var podCIDR, serviceCIDR []string + + podReg := regexp.MustCompile(`podSubnet: (.*)`) + serviceReg := regexp.MustCompile(`serviceSubnet: (.*)`) + + var podSubnets, serviceSubnets []string + for _, data := range cm.Data { + podSubnets = podReg.FindStringSubmatch(data) + serviceSubnets = serviceReg.FindStringSubmatch(data) + } + + if len(podSubnets) != 0 { + for _, cidr := range strings.Split(podSubnets[1], ",") { + _, _, err := net.ParseCIDR(cidr) + if err != nil { + continue + } + podCIDR = append(podCIDR, cidr) + } + } + + if len(serviceSubnets) != 0 { + for _, cidr := range strings.Split(serviceSubnets[1], ",") { + _, _, err := net.ParseCIDR(cidr) + if err != nil { + continue + } + serviceCIDR = append(serviceCIDR, cidr) + } + } + + return podCIDR, serviceCIDR +} + +func ExtractK8sCIDRFromKCMPod(kcm *corev1.Pod) ([]string, []string) { var podCIDR, serviceCIDR []string podReg := regexp.MustCompile(`--cluster-cidr=(.*)`) diff --git a/test/doc/spidercoodinator.md b/test/doc/spidercoodinator.md index 4a1f0ba7de..dde1179b71 100644 --- a/test/doc/spidercoodinator.md +++ b/test/doc/spidercoodinator.md @@ -10,3 +10,4 @@ | V00006 | status.phase is not-ready, expect the cidr of status to be empty | p3 | | done | | | V00007 | spidercoordinator has the lowest priority | p3 | | done | | | V00008 | status.phase is not-ready, pods will fail to run | p3 | | done | | +| V00009 | it can get the clusterCIDR from kubeadmConfig or kube-controller-manager pod | p3 | | done| diff --git a/test/e2e/spidercoordinator/spidercoordinator_test.go b/test/e2e/spidercoordinator/spidercoordinator_test.go index 1054578fb6..22f2c2cd3f 100644 --- a/test/e2e/spidercoordinator/spidercoordinator_test.go +++ b/test/e2e/spidercoordinator/spidercoordinator_test.go @@ -5,14 +5,18 @@ package spidercoordinator_suite_test import ( "context" "fmt" + "reflect" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" "github.com/spidernet-io/spiderpool/pkg/constant" "github.com/spidernet-io/spiderpool/pkg/coordinatormanager" "github.com/spidernet-io/spiderpool/pkg/ip" + spiderpoolv2beta1 "github.com/spidernet-io/spiderpool/pkg/k8s/apis/spiderpool.spidernet.io/v2beta1" "github.com/spidernet-io/spiderpool/test/e2e/common" + corev1 "k8s.io/api/core/v1" "k8s.io/utils/pointer" + "sigs.k8s.io/controller-runtime/pkg/client" ) var _ = Describe("SpiderCoordinator", Label("spidercoordinator", "overlay"), Serial, func() { @@ -94,21 +98,25 @@ var _ = Describe("SpiderCoordinator", Label("spidercoordinator", "overlay"), Ser } Eventually(func() bool { - By("Get the default spidercoodinator.") spc, err := GetSpiderCoordinator(common.SpidercoodinatorDefaultName) Expect(err).NotTo(HaveOccurred(), "failed to get SpiderCoordinator, error is %v", err) + GinkgoWriter.Printf("Display the default spider coordinator information: %+v \n", spc) - By("After restoring the cni configuration under /etc/cni/net.d, the environment returns to normal.") if spc.Status.OverlayPodCIDR == nil || spc.Status.Phase != coordinatormanager.Synced { GinkgoWriter.Printf("status.overlayPodCIDR status is still synchronizing, status %+v \n", spc.Status.OverlayPodCIDR) return false } + for _, cidr := range spc.Status.OverlayPodCIDR { if ip.IsIPv4CIDR(cidr) { - Expect(cidr).To(Equal(v4PodCIDRString)) + if cidr != v4PodCIDRString { + return false + } GinkgoWriter.Printf("ipv4 podCIDR is as expected, value %v=%v \n", cidr, v4PodCIDRString) } else { - Expect(cidr).To(Equal(v6PodCIDRString)) + if cidr != v6PodCIDRString { + return false + } GinkgoWriter.Printf("ipv6 podCIDR is as expected, value %v=%v \n", cidr, v6PodCIDRString) } } @@ -181,17 +189,23 @@ var _ = Describe("SpiderCoordinator", Label("spidercoordinator", "overlay"), Ser Eventually(func() bool { spc, err := GetSpiderCoordinator(common.SpidercoodinatorDefaultName) Expect(err).NotTo(HaveOccurred(), "failed to get SpiderCoordinator, error is %v", err) + GinkgoWriter.Printf("Display the default spider coordinator information: %+v \n", spc) if spc.Status.OverlayPodCIDR == nil || spc.Status.Phase != coordinatormanager.Synced { GinkgoWriter.Printf("status.overlayPodCIDR status is still synchronizing, status %+v \n", spc.Status.OverlayPodCIDR) return false } + for _, cidr := range spc.Status.OverlayPodCIDR { if ip.IsIPv4CIDR(cidr) { - Expect(cidr).To(Equal(v4PodCIDRString)) + if cidr != v4PodCIDRString { + return false + } GinkgoWriter.Printf("ipv4 podCIDR is as expected, value %v=%v \n", cidr, v4PodCIDRString) } else { - Expect(cidr).To(Equal(v6PodCIDRString)) + if cidr != v6PodCIDRString { + return false + } GinkgoWriter.Printf("ipv6 podCIDR is as expected, value %v=%v \n", cidr, v6PodCIDRString) } } @@ -264,21 +278,23 @@ var _ = Describe("SpiderCoordinator", Label("spidercoordinator", "overlay"), Ser Eventually(func() bool { spc, err := GetSpiderCoordinator(common.SpidercoodinatorDefaultName) Expect(err).NotTo(HaveOccurred(), "failed to get SpiderCoordinator, error is %v", err) + GinkgoWriter.Printf("Display the default spider coordinator information: %+v \n", spc) - if spc.Status.Phase != coordinatormanager.Synced { - GinkgoWriter.Printf("status.Phase status is still synchronizing, status %+v \n", spc.Status.Phase) - return false - } - if spc.Status.OverlayPodCIDR == nil { + if spc.Status.OverlayPodCIDR == nil || spc.Status.Phase != coordinatormanager.Synced { GinkgoWriter.Printf("status.overlayPodCIDR status is still synchronizing, status %+v \n", spc.Status.OverlayPodCIDR) return false } + for _, cidr := range spc.Status.OverlayPodCIDR { if ip.IsIPv4CIDR(cidr) { - Expect(cidr).To(Equal(v4PodCIDRString)) + if cidr != v4PodCIDRString { + return false + } GinkgoWriter.Printf("ipv4 podCIDR is as expected, value %v=%v \n", cidr, v4PodCIDRString) } else { - Expect(cidr).To(Equal(v6PodCIDRString)) + if cidr != v6PodCIDRString { + return false + } GinkgoWriter.Printf("ipv6 podCIDR is as expected, value %v=%v \n", cidr, v6PodCIDRString) } } @@ -316,4 +332,127 @@ var _ = Describe("SpiderCoordinator", Label("spidercoordinator", "overlay"), Ser }, common.ExecCommandTimeout, common.ForcedWaitingTime).Should(BeTrue()) }) }) + + Context("It can get the clusterCIDR from kubeadmConfig and kube-controller-manager pod", func() { + + var spc *spiderpoolv2beta1.SpiderCoordinator + var cm *corev1.ConfigMap + var err error + BeforeEach(func() { + if !common.CheckRunOverlayCNI() { + GinkgoWriter.Println("This environment is in underlay mode.") + Skip("Not applicable to underlay mode") + } + + if !common.CheckCalicoFeatureOn() { + GinkgoWriter.Println("The CNI isn't calico.") + Skip("This case only run in calico") + } + + cm, err = frame.GetConfigmap("kubeadm-config", "kube-system") + Expect(err).NotTo(HaveOccurred()) + + spc, err = GetSpiderCoordinator(common.SpidercoodinatorDefaultName) + Expect(err).NotTo(HaveOccurred(), "failed to get SpiderCoordinator, error is %v", err) + + // Switch podCIDRType to `cluster`. + spcCopy := spc.DeepCopy() + spcCopy.Spec.PodCIDRType = pointer.String(common.PodCIDRTypeCluster) + Expect(PatchSpiderCoordinator(spcCopy, spc)).NotTo(HaveOccurred()) + + DeferCleanup(func() { + spc, err := GetSpiderCoordinator(common.SpidercoodinatorDefaultName) + Expect(err).NotTo(HaveOccurred(), "failed to get SpiderCoordinator, error is %v", err) + GinkgoWriter.Printf("Display the default spider coordinator information: %+v \n", spc) + + // Switch podCIDRType to `auto`. + spcCopy := spc.DeepCopy() + spcCopy.Spec.PodCIDRType = pointer.String(common.PodCIDRTypeAuto) + Expect(PatchSpiderCoordinator(spcCopy, spc)).NotTo(HaveOccurred()) + + Eventually(func() bool { + spc, err := GetSpiderCoordinator(common.SpidercoodinatorDefaultName) + Expect(err).NotTo(HaveOccurred(), "failed to get SpiderCoordinator, error is %v", err) + GinkgoWriter.Printf("Display the default spider coordinator information: %+v \n", spc) + + if spc.Status.OverlayPodCIDR == nil || spc.Status.Phase != coordinatormanager.Synced { + GinkgoWriter.Printf("status.overlayPodCIDR status is still synchronizing, status %+v \n", spc.Status.OverlayPodCIDR) + return false + } + + for _, cidr := range spc.Status.OverlayPodCIDR { + if ip.IsIPv4CIDR(cidr) { + if cidr != v4PodCIDRString { + return false + } + GinkgoWriter.Printf("ipv4 podCIDR is as expected, value %v=%v \n", cidr, v4PodCIDRString) + } else { + if cidr != v6PodCIDRString { + return false + } + GinkgoWriter.Printf("ipv6 podCIDR is as expected, value %v=%v \n", cidr, v6PodCIDRString) + } + } + return true + }, common.ExecCommandTimeout, common.ForcedWaitingTime).Should(BeTrue()) + }) + }) + + It("Prioritize getting ClusterCIDR from kubeadm-config", func() { + GinkgoWriter.Printf("podCIDR and serviceCIDR from spidercoordinator: %v,%v\n", spc.Status.OverlayPodCIDR, spc.Status.ServiceCIDR) + + podCIDR, serviceCIDr := coordinatormanager.ExtractK8sCIDRFromKubeadmConfigMap(cm) + GinkgoWriter.Printf("podCIDR and serviceCIDR from kubeadm-config : %v,%v\n", podCIDR, serviceCIDr) + + Eventually(func() bool { + spc, err = GetSpiderCoordinator(common.SpidercoodinatorDefaultName) + Expect(err).NotTo(HaveOccurred(), "failed to get SpiderCoordinator, error is %v", err) + + if spc.Status.Phase != coordinatormanager.Synced { + return false + } + + if reflect.DeepEqual(podCIDR, spc.Status.OverlayPodCIDR) && reflect.DeepEqual(serviceCIDr, spc.Status.ServiceCIDR) { + return true + } + + return false + }, common.ExecCommandTimeout, common.ForcedWaitingTime).Should(BeTrue()) + }) + + It("Getting clusterCIDR from kube-controller-manager Pod when kubeadm-config does not exist", func() { + // delete the kubeadm-config configMap + GinkgoWriter.Print("deleting kubeadm-config\n") + err = frame.DeleteConfigmap("kubeadm-config", "kube-system") + Expect(err).NotTo(HaveOccurred()) + + defer func() { + cm.ResourceVersion = "" + cm.Generation = 0 + err = frame.CreateConfigmap(cm) + Expect(err).NotTo(HaveOccurred()) + }() + + allPods, err := frame.GetPodList(client.MatchingLabels{"component": "kube-controller-manager"}) + Expect(err).NotTo(HaveOccurred()) + + kcmPodCIDR, kcmServiceCIDR := coordinatormanager.ExtractK8sCIDRFromKCMPod(&allPods.Items[0]) + GinkgoWriter.Printf("podCIDR and serviceCIDR from kube-controller-manager pod : %v,%v\n", kcmPodCIDR, kcmServiceCIDR) + + Eventually(func() bool { + spc, err = GetSpiderCoordinator(common.SpidercoodinatorDefaultName) + Expect(err).NotTo(HaveOccurred(), "failed to get SpiderCoordinator, error is %v", err) + + if spc.Status.Phase != coordinatormanager.Synced { + return false + } + + if reflect.DeepEqual(kcmPodCIDR, spc.Status.OverlayPodCIDR) && reflect.DeepEqual(kcmServiceCIDR, spc.Status.ServiceCIDR) { + return true + } + + return false + }, common.ExecCommandTimeout, common.ForcedWaitingTime).Should(BeTrue()) + }) + }) })