diff --git a/.github/workflows/auto-upgrade-ci.yaml b/.github/workflows/auto-upgrade-ci.yaml index 6892931ff5..2227b93113 100644 --- a/.github/workflows/auto-upgrade-ci.yaml +++ b/.github/workflows/auto-upgrade-ci.yaml @@ -333,10 +333,17 @@ jobs: id: upgrade continue-on-error: true run: | + RESULT=0 make upgrade_e2e_spiderpool -e E2E_CLUSTER_NAME=${{ env.E2E_CLUSTER_NAME }} \ -e E2E_SPIDERPOOL_TAG=${{ needs.call_build_new_ci_image.outputs.imageTag }} \ -e SPIDERPOOL_AGENT_IMAGE_NAME=spiderpool-agent-race \ - -e SPIDERPOOL_CONTROLLER_IMAGE_NAME=spiderpool-controller-race + -e SPIDERPOOL_CONTROLLER_IMAGE_NAME=spiderpool-controller-race || RESULT=1 + if ((RESULT==0)) ; then + echo "succeeded to upgrade spiderpool from ${{ needs.get_ref.outputs.old_version }} to ${{ needs.get_ref.outputs.new_version }}" + else + echo "failed to upgrade spiderpool from ${{ needs.get_ref.outputs.old_version }} to ${{ needs.get_ref.outputs.new_version }}" + exit 1 + fi - name: Run e2e Test on ${{ needs.get_ref.outputs.new_version }} id: run_e2e diff --git a/.github/workflows/e2e-init.yaml b/.github/workflows/e2e-init.yaml index 888e1953de..aac5aab222 100644 --- a/.github/workflows/e2e-init.yaml +++ b/.github/workflows/e2e-init.yaml @@ -98,12 +98,6 @@ jobs: with: go-version: 1.21.4 - # https://github.com/helm/kind-action - - name: Install Kind Bin - uses: helm/kind-action@v1.8.0 - with: - install_only: true - - name: Install Tools run: | # install kind/p2ctl/helm/ginkgo @@ -204,22 +198,10 @@ jobs: path: e2ereport.json retention-days: 1 - - name: helm uninstalls spiderpool and cleans spiderpool CRD - id: clean - if: ${{ inputs.run_e2e == 'true' }} - run: | - RESULT=0 - make clean_e2e_spiderpool || RESULT=1 - if ((RESULT==0)) ; then - echo "CLEAN_E2E_PASS=true" >> $GITHUB_ENV - else - echo "CLEAN_E2E_PASS=false" >> $GITHUB_ENV - fi - - name: Show e2e Result if: ${{ inputs.run_e2e == 'true' }} run: | - if ${{ env.RUN_E2E_PASS == 'true' && env.CLEAN_E2E_PASS == 'true' }} ;then + if ${{ env.RUN_E2E_PASS == 'true' }} ;then exit 0 else exit 1 @@ -248,3 +230,16 @@ jobs: label: performance message: ${{ env.PERFORMANCE_RESULT }} color: lightgrey + + - name: helm uninstalls spiderpool and cleans spiderpool CRD + id: clean + if: ${{ inputs.run_e2e == 'true' }} + run: | + RESULT=0 + make clean_e2e_spiderpool || RESULT=1 + if ((RESULT==0)) ; then + echo "succeeded to uninstall spiderpool" + else + echo "failed to uninstall spiderpool" + fi + diff --git a/Makefile b/Makefile index d22ab56bd7..745d1e5b0c 100644 --- a/Makefile +++ b/Makefile @@ -426,11 +426,11 @@ clean: clean_e2e .PHONY: clean_e2e_spiderpool clean_e2e_spiderpool: - -$(QUIET) make -C test uninstall_spiderpool + $(QUIET) make -C test uninstall_spiderpool .PHONY: upgrade_e2e_spiderpool upgrade_e2e_spiderpool: - -$(QUIET) make -C test upgrade_spiderpool + $(QUIET) make -C test upgrade_spiderpool .PHONY: codegen codegen: diff --git a/charts/spiderpool/templates/deleteHook.yaml b/charts/spiderpool/templates/deleteHook.yaml new file mode 100644 index 0000000000..ef6a0b924e --- /dev/null +++ b/charts/spiderpool/templates/deleteHook.yaml @@ -0,0 +1,39 @@ +{{- if .Values.spiderpoolController.cleanup.enable }} +apiVersion: batch/v1 +kind: Job +metadata: + name: {{ .Values.spiderpoolController.name | trunc 48 | trimSuffix "-" }}-hook-pre-delete + annotations: + "helm.sh/hook": pre-delete + "helm.sh/hook-delete-policy": before-hook-creation,hook-succeeded +spec: + template: + spec: + hostNetwork: true + serviceAccountName: {{ .Values.spiderpoolController.name | trunc 63 | trimSuffix "-" }} + containers: + - name: pre-delete + image: {{ include "spiderpool.spiderpoolController.image" . | quote }} + command: + - {{ .Values.spiderpoolController.binName }} + - clean + - --validate + - {{ .Values.spiderpoolController.name | trunc 63 | trimSuffix "-" }} + - --mutating + - {{ .Values.spiderpoolController.name | trunc 63 | trimSuffix "-" }} + env: + - name: SPIDERPOOL_POD_NAMESPACE + value: {{ .Release.Namespace | quote }} + - name: SPIDERPOOL_INIT_NAME + value: {{ .Values.spiderpoolInit.name | trunc 63 | trimSuffix "-" | quote }} + - name: SPIDERPOOL_POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: SPIDERPOOL_POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + restartPolicy: Never + backoffLimit: 2 +{{- end }} diff --git a/charts/spiderpool/templates/role.yaml b/charts/spiderpool/templates/role.yaml index 3aea975e0a..d73364abc9 100644 --- a/charts/spiderpool/templates/role.yaml +++ b/charts/spiderpool/templates/role.yaml @@ -59,6 +59,7 @@ rules: - cronjobs - jobs verbs: + - delete - get - list - update diff --git a/cmd/spiderpool-controller/cmd/clean.go b/cmd/spiderpool-controller/cmd/clean.go new file mode 100644 index 0000000000..a7ce870238 --- /dev/null +++ b/cmd/spiderpool-controller/cmd/clean.go @@ -0,0 +1,310 @@ +// Copyright 2022 Authors of spidernet-io +// SPDX-License-Identifier: Apache-2.0 + +package cmd + +import ( + "context" + "os" + "reflect" + "strings" + + "github.com/hashicorp/go-multierror" + "github.com/spf13/cobra" + "github.com/spidernet-io/spiderpool/pkg/constant" + spiderpoolv2beta1 "github.com/spidernet-io/spiderpool/pkg/k8s/apis/spiderpool.spidernet.io/v2beta1" + "github.com/spidernet-io/spiderpool/pkg/k8s/utils" + webhook "k8s.io/api/admissionregistration/v1" + batchv1 "k8s.io/api/batch/v1" + apiextensionsv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +var cleanCmd = &cobra.Command{ + Use: "clean", + Short: "Clean resources", + Long: "Clean resources with specified parameters.", + Run: func(cmd *cobra.Command, args []string) { + + validate, err := cmd.Flags().GetString("validate") + if err != nil { + logger.Fatal(err.Error()) + os.Exit(1) + } + mutating, err := cmd.Flags().GetString("mutating") + if err != nil { + logger.Fatal(err.Error()) + os.Exit(1) + } + logger.Sugar().Infof("validate %s\nmutating %s\n", validate, mutating) + + client, err := NewCoreClient() + if err != nil { + logger.Fatal(err.Error()) + os.Exit(1) + } + err = client.clean(validate, mutating) + if err != nil { + logger.Fatal(err.Error()) + os.Exit(1) + } + }, +} + +const ( + ENVNamespace = "SPIDERPOOL_POD_NAMESPACE" + ENVSpiderpoolInitName = "SPIDERPOOL_INIT_NAME" +) + +type CoreClient struct { + client.Client +} + +func NewCoreClient() (*CoreClient, error) { + c, err := client.New( + ctrl.GetConfigOrDie(), + client.Options{Scheme: scheme}, + ) + if err != nil { + return nil, err + } + + return &CoreClient{Client: c}, nil +} + +func (c *CoreClient) clean(validate, mutating string) error { + var jobResult *multierror.Error + ctx := context.Background() + + // Clean up MutatingWebhookConfiguration resources of spiderpool + if err := c.cleanWebhookResources(ctx, constant.MutatingWebhookConfiguration, mutating, &webhook.MutatingWebhookConfiguration{}); err != nil { + jobResult = multierror.Append(jobResult, err) + } + + // Clean up SriovNetworkResourcesInjectorMutating resources of sriov-network-operator + if err := c.cleanWebhookResources(ctx, constant.MutatingWebhookConfiguration, constant.SriovNetworkResourcesInjectorMutating, &webhook.MutatingWebhookConfiguration{}); err != nil { + jobResult = multierror.Append(jobResult, err) + } + + // Clean up sriov-operator-webhook-config resources of sriov-network-operator + if err := c.cleanWebhookResources(ctx, constant.MutatingWebhookConfiguration, constant.SriovOperatorWebhookConfigMutatingOrValidate, &webhook.MutatingWebhookConfiguration{}); err != nil { + jobResult = multierror.Append(jobResult, err) + } + + // Clean up ValidatingWebhookConfiguration resources of spiderpool + if err := c.cleanWebhookResources(ctx, constant.ValidatingWebhookConfiguration, validate, &webhook.ValidatingWebhookConfiguration{}); err != nil { + jobResult = multierror.Append(jobResult, err) + } + + // Clean up sriov-operator-webhook-config resources of sriov-network-operator + if err := c.cleanWebhookResources(ctx, constant.ValidatingWebhookConfiguration, constant.SriovOperatorWebhookConfigMutatingOrValidate, &webhook.ValidatingWebhookConfiguration{}); err != nil { + jobResult = multierror.Append(jobResult, err) + } + + // Clean up SpiderIPPool resources of spiderpool + if err := c.cleanSpiderpoolResources(ctx, &spiderpoolv2beta1.SpiderIPPoolList{}, constant.KindSpiderIPPool); err != nil { + jobResult = multierror.Append(jobResult, err) + } + + // Clean up SpiderSubnet resources of spiderpool + if err := c.cleanSpiderpoolResources(ctx, &spiderpoolv2beta1.SpiderSubnetList{}, constant.KindSpiderSubnet); err != nil { + jobResult = multierror.Append(jobResult, err) + } + + // Clean up SpiderEndpoint resources of spiderpool + if err := c.cleanSpiderpoolResources(ctx, &spiderpoolv2beta1.SpiderEndpointList{}, constant.KindSpiderEndpoint); err != nil { + jobResult = multierror.Append(jobResult, err) + } + + // Clean up SpiderReservedIP resources of spiderpool + if err := c.cleanSpiderpoolResources(ctx, &spiderpoolv2beta1.SpiderReservedIPList{}, constant.KindSpiderReservedIP); err != nil { + jobResult = multierror.Append(jobResult, err) + } + + // Clean up SpiderMultusConfig resources of spiderpool + if err := c.cleanSpiderpoolResources(ctx, &spiderpoolv2beta1.SpiderMultusConfigList{}, constant.KindSpiderMultusConfig); err != nil { + jobResult = multierror.Append(jobResult, err) + } + + // Clean up SpiderCoordinator resources of spiderpool + if err := c.cleanSpiderpoolResources(ctx, &spiderpoolv2beta1.SpiderCoordinatorList{}, constant.KindSpiderCoordinator); err != nil { + jobResult = multierror.Append(jobResult, err) + } + + // Clean up SpiderClaimParameter resources of spiderpool + if err := c.cleanSpiderpoolResources(ctx, &spiderpoolv2beta1.SpiderClaimParameterList{}, constant.KindSpiderClaimParameter); err != nil { + jobResult = multierror.Append(jobResult, err) + } + + // Delete all crds of spiderpool or sriov-network-operator + if err := c.cleanCRDs(ctx); err != nil { + jobResult = multierror.Append(jobResult, err) + } + + // Delete Job of spiderpool-Init + spiderpoolInitNamespace := strings.ReplaceAll(os.Getenv(ENVNamespace), "\"", "") + if len(spiderpoolInitNamespace) == 0 { + logger.Sugar().Errorf("Tried to clean up spiderpool-init job, but ENV %s %w", ENVNamespace, constant.ErrMissingRequiredParam) + } + spiderpoolInitName := strings.ReplaceAll(os.Getenv(ENVSpiderpoolInitName), "\"", "") + if len(spiderpoolInitName) == 0 { + logger.Sugar().Errorf("Tried to clean up spiderpool-init job, but ENV %s %v", ENVSpiderpoolInitName, constant.ErrMissingRequiredParam) + } + + if len(spiderpoolInitName) != 0 && len(spiderpoolInitNamespace) != 0 { + err := c.cleanSpiderpoolInitJob(ctx, spiderpoolInitNamespace, spiderpoolInitName) + jobResult = multierror.Append(jobResult, err) + } + + return jobResult.ErrorOrNil() +} + +// cleanWebhookResources deletes a specific webhook configuration based on the provided resource type and name. +func (c *CoreClient) cleanWebhookResources(ctx context.Context, resourceType, resourceName string, obj client.Object) error { + err := utils.DeleteWebhookConfiguration(ctx, c, resourceName, obj) + if err != nil { + if apierrors.IsNotFound(err) { + logger.Sugar().Infof("%s: %s does not exist, error: %v", resourceType, resourceName, err) + return nil + } + + logger.Sugar().Errorf("failed to delete %s: %s, error: %v", resourceType, resourceName, err) + return err + } + + logger.Sugar().Infof("succeeded to delete %s: %s", resourceType, resourceName) + return nil +} + +// cleanSpiderpoolResources lists and deletes specific Spiderpool resources, with an optional finalizer cleanup step. +func (c *CoreClient) cleanSpiderpoolResources(ctx context.Context, list client.ObjectList, resourceName string) error { + var jobResult *multierror.Error + err := c.List(ctx, list) + if err != nil { + if apierrors.IsNotFound(err) { + logger.Sugar().Infof("%s does not exist, error: %v", resourceName, err) + return nil + } + logger.Sugar().Errorf("failed to list %s, error: %v", resourceName, err) + return err + } + + items := reflect.ValueOf(list).Elem().FieldByName("Items") + for i := 0; i < items.Len(); i++ { + item := items.Index(i).Addr().Interface().(client.Object) + + cleanFinalizers := false + switch resourceName { + case constant.KindSpiderIPPool: + cleanFinalizers = true + case constant.KindSpiderEndpoint: + cleanFinalizers = true + case constant.KindSpiderCoordinator: + cleanFinalizers = true + default: + cleanFinalizers = false + } + + if cleanFinalizers { + item.SetFinalizers(nil) + err = c.Update(ctx, item) + if err != nil { + logger.Sugar().Errorf("failed to clean the finalizers of %s: %v, %v", resourceName, item.GetName(), err) + jobResult = multierror.Append(jobResult, err) + continue + } + logger.Sugar().Infof("succeeded to clean the finalizers of %s %v", resourceName, item.GetName()) + } + err = c.Delete(ctx, item) + if err != nil { + if apierrors.IsNotFound(err) { + logger.Sugar().Errorf("%s: %v does not exist, error: %v", resourceName, item.GetName(), err) + continue + } + logger.Sugar().Errorf("failed to delete %s: %v, %v", resourceName, item.GetName(), err) + jobResult = multierror.Append(jobResult, err) + continue + } + logger.Sugar().Infof("succeeded to delete %s: %v", resourceName, item.GetName()) + } + + return jobResult.ErrorOrNil() +} + +// cleanCRDs lists and deletes CustomResourceDefinitions (CRDs) related to Spiderpool and sriov-network-operator. +func (c *CoreClient) cleanCRDs(ctx context.Context) error { + var jobResult *multierror.Error + crdList := &apiextensionsv1.CustomResourceDefinitionList{} + err := c.List(ctx, crdList) + if err != nil { + if apierrors.IsNotFound(err) { + logger.Sugar().Infof("CustomResourceDefinitionList does not exist, error: %v", err) + return nil + } + logger.Sugar().Errorf("failed to list CustomResourceDefinitionList, error: %v", err) + return err + } + + for _, item := range crdList.Items { + cleanCRD := false + switch item.Spec.Group { + case constant.SpiderpoolAPIGroup: + cleanCRD = true + // Delete all crds of sriov-network-operator + // After sriov-network-operator was uninstalled, sriov-network-operator did not delete its own CRD, + // and there were residual CRDs, which might bring some hidden dangers to the upgrade of sriov-network-operator; + // we tried to uninstall it through spiderpool. + case constant.SriovNetworkOperatorAPIGroup: + // After helm uninstall, sriov-operator will delete the resources under sriovoperatorconfigs.sriovnetwork.openshift.io. + // If we delete this CRD resource in advance, helm uninstall will report an error. + // We will skip it for now to allow other resources to be deleted. + if item.Name == constant.SriovNetworkOperatorConfigs { + cleanCRD = false + } else { + cleanCRD = true + } + default: + cleanCRD = false + } + + if cleanCRD { + err = c.Delete(ctx, &item) + if err != nil { + logger.Sugar().Errorf("failed to delete CRD: %v, %v", item.Name, err) + jobResult = multierror.Append(jobResult, err) + continue + } + logger.Sugar().Infof("succeeded to delete CRD: %v", item.Name) + } + } + + return jobResult.ErrorOrNil() +} + +// cleanSpiderpoolInitJob deletes the spiderpool-init Job, logs any errors or success. +func (c *CoreClient) cleanSpiderpoolInitJob(ctx context.Context, spiderpoolInitNamespace, spiderpoolInitName string) error { + spiderpoolInitJob := &batchv1.Job{} + err := c.Get(ctx, types.NamespacedName{Namespace: spiderpoolInitNamespace, Name: spiderpoolInitName}, spiderpoolInitJob) + if err != nil { + if apierrors.IsNotFound(err) { + logger.Sugar().Infof("spiderpool-init Job %s/%s does not exist, error: %v", spiderpoolInitNamespace, spiderpoolInitName, err) + return nil + } + logger.Sugar().Errorf("failed to get spiderpool-init Job %s/%s, error: %v", spiderpoolInitNamespace, spiderpoolInitName, err) + return err + } + + propagationPolicy := metav1.DeletePropagationBackground + err = c.Delete(ctx, spiderpoolInitJob, &client.DeleteOptions{PropagationPolicy: &propagationPolicy}) + if err != nil { + logger.Sugar().Errorf("failed to delete spiderpool-init Job: %v/%v, %v", spiderpoolInitJob.Namespace, spiderpoolInitJob.Name, err) + return err + } + logger.Sugar().Infof("succeeded to delete spiderpool-init Job: %v/%v", spiderpoolInitJob.Namespace, spiderpoolInitJob.Name) + + return nil +} diff --git a/pkg/k8s/utils/utils.go b/pkg/k8s/utils/utils.go new file mode 100644 index 0000000000..602ed18f69 --- /dev/null +++ b/pkg/k8s/utils/utils.go @@ -0,0 +1,24 @@ +// Copyright 2024 Authors of spidernet-io +// SPDX-License-Identifier: Apache-2.0 + +package utils + +import ( + "context" + + "sigs.k8s.io/controller-runtime/pkg/client" +) + +func DeleteWebhookConfiguration(ctx context.Context, c client.Client, name string, obj client.Object) error { + err := c.Get(ctx, client.ObjectKey{Name: name}, obj) + if err != nil { + return err + } + + err = c.Delete(ctx, obj) + if err != nil { + return err + } + + return nil +} diff --git a/test/scripts/debugEnv.sh b/test/scripts/debugEnv.sh index ef9e856a24..4dd501a52b 100755 --- a/test/scripts/debugEnv.sh +++ b/test/scripts/debugEnv.sh @@ -24,6 +24,12 @@ COMPONENT_PS_PROCESS_MAX=50 CONTROLLER_POD_LIST=$( kubectl get pods --no-headers --kubeconfig ${E2E_KUBECONFIG} --namespace ${NAMESPACE} --selector app.kubernetes.io/component=spiderpool-controller --output jsonpath={.items[*].metadata.name} ) AGENT_POD_LIST=$( kubectl get pods --no-headers --kubeconfig ${E2E_KUBECONFIG} --namespace ${NAMESPACE} --selector app.kubernetes.io/component=spiderpool-agent --output jsonpath={.items[*].metadata.name} ) KUBEVIRT_HANDLER_POD_LIST=$( kubectl get pods --no-headers --kubeconfig ${E2E_KUBECONFIG} --namespace kubevirt --selector kubevirt.io=virt-handler --output jsonpath={.items[*].metadata.name} ) +<<<<<<< HEAD +======= +KDOCTOR_POD_LIST=$( kubectl get pods --no-headers --kubeconfig ${E2E_KUBECONFIG} --namespace ${NAMESPACE} --selector app.kubernetes.io/instance=kdoctor --output jsonpath={.items[*].metadata.name} ) +KRUISE_POD_LIST=$( kubectl get pods --no-headers --kubeconfig ${E2E_KUBECONFIG} --namespace kruise-system --output jsonpath={.items[*].metadata.name} ) +SPIDERPOOL_UNINSTALL_POD_LIST=$( kubectl get pods --no-headers --kubeconfig ${E2E_KUBECONFIG} --namespace ${NAMESPACE} -l job-name=spiderpool-controller-hook-pre-delete --output jsonpath={.items[*].metadata.name} ) +>>>>>>> 973dafe1 (fix: cannot uninstall spiderpool when sriovOperatorConfig is installed) [ -z "$CONTROLLER_POD_LIST" ] && echo "error, failed to find any spider controller pod" && exit 1 [ -z "$AGENT_POD_LIST" ] && echo "error, failed to find any spider agent pod" && exit 1 @@ -78,8 +84,12 @@ elif [ "$TYPE"x == "detail"x ] ; then echo "${ERROR_POD}" for LINE in ${ERROR_POD}; do NS_NAME=${LINE//,/ } - echo "---------------error pod: ${NS_NAME}------------" + echo "---------------describe error pod: ${NS_NAME}------------" kubectl describe pod -n ${NS_NAME} --kubeconfig ${E2E_KUBECONFIG} + echo "---------------logs error pod: ${NS_NAME}------------" + kubectl logs -n ${NS_NAME} --kubeconfig ${E2E_KUBECONFIG} + echo "---------------logs error pod: ${NS_NAME} --previous------------" + kubectl logs -n ${NS_NAME} --kubeconfig ${E2E_KUBECONFIG} --previous done fi @@ -130,6 +140,16 @@ elif [ "$TYPE"x == "detail"x ] ; then echo "--------- kubectl logs -l job-name=spiderpool-init -n ${NAMESPACE} " kubectl logs -l job-name=spiderpool-init -n ${NAMESPACE} --kubeconfig ${E2E_KUBECONFIG} + echo "" + echo "=============== spiderpool-uninstall pod logs ============== " + for POD in $SPIDERPOOL_UNINSTALL_POD_LIST ; do + echo "" + echo "---------kubectl logs ${POD} -n ${NAMESPACE} " + kubectl logs ${POD} -n ${NAMESPACE} --kubeconfig ${E2E_KUBECONFIG} + echo "--------- kubectl logs ${POD} -n ${NAMESPACE} --previous" + kubectl logs ${POD} -n ${NAMESPACE} --kubeconfig ${E2E_KUBECONFIG} --previous + done + echo "" echo "=============== spiderpool crd spiderippool ============== " echo "--------- kubectl get spiderippool -o wide"