From e8902e2b7c6c77d9eeca81faff97436ae4136af1 Mon Sep 17 00:00:00 2001 From: vakul-gupta-flp <92844305+vakul-gupta-flp@users.noreply.github.com> Date: Tue, 14 Dec 2021 12:11:49 +0530 Subject: [PATCH] fix(volume): Fix Error Handling On Creation Failure (#51) - Persistent volume provision failure handling Co-authored-by: Vakul Gupta Co-authored-by: Vakul Gupta Co-authored-by: Vakul Gupta --- .github/workflows/build.yml | 4 +- .github/workflows/pull_request.yml | 4 +- deploy/device-operator.yaml | 1 + deploy/yamls/devicevolume-crd.yaml | 1 + .../device/v1alpha1/devicevolume.go | 2 +- .../device/v1alpha1/devicevolumefunctions.go | 23 ++++++ pkg/device/device-util.go | 5 +- pkg/device/volume.go | 14 ++-- pkg/mgmt/volume/volume.go | 22 ++++-- tests/provision_test.go | 7 ++ tests/suite_test.go | 4 ++ tests/utils.go | 72 +++++++++++++++++++ 12 files changed, 143 insertions(+), 16 deletions(-) create mode 100644 pkg/apis/openebs.io/device/v1alpha1/devicevolumefunctions.go diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index d9be3a52..97e372bd 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -65,10 +65,10 @@ jobs: - name: Checkout uses: actions/checkout@v2 - - name: Set up Go 1.14 + - name: Set up Go 1.16 uses: actions/setup-go@v2 with: - go-version: 1.14.7 + go-version: 1.16.5 - name: Setup Minikube-Kubernetes uses: manusa/actions-setup-minikube@v2.3.0 diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml index 36e37bd8..1c86db7f 100644 --- a/.github/workflows/pull_request.yml +++ b/.github/workflows/pull_request.yml @@ -72,10 +72,10 @@ jobs: - name: Checkout uses: actions/checkout@v2 - - name: Set up Go 1.14 + - name: Set up Go 1.16 uses: actions/setup-go@v2 with: - go-version: 1.14.7 + go-version: 1.16.5 - name: Setup Minikube-Kubernetes uses: manusa/actions-setup-minikube@v2.3.0 diff --git a/deploy/device-operator.yaml b/deploy/device-operator.yaml index e68e2854..71be1fde 100644 --- a/deploy/device-operator.yaml +++ b/deploy/device-operator.yaml @@ -130,6 +130,7 @@ spec: enum: - Pending - Ready + - Failed type: string type: object required: diff --git a/deploy/yamls/devicevolume-crd.yaml b/deploy/yamls/devicevolume-crd.yaml index e4d4e1d4..0feb91b5 100644 --- a/deploy/yamls/devicevolume-crd.yaml +++ b/deploy/yamls/devicevolume-crd.yaml @@ -109,6 +109,7 @@ spec: enum: - Pending - Ready + - Failed type: string type: object required: diff --git a/pkg/apis/openebs.io/device/v1alpha1/devicevolume.go b/pkg/apis/openebs.io/device/v1alpha1/devicevolume.go index 31cc8817..65972f33 100644 --- a/pkg/apis/openebs.io/device/v1alpha1/devicevolume.go +++ b/pkg/apis/openebs.io/device/v1alpha1/devicevolume.go @@ -77,7 +77,7 @@ type VolStatus struct { // The state "Pending" means that the volume creation request has not // processed yet. The state "Ready" means that the volume has been created // and it is ready for the use. - // +kubebuilder:validation:Enum=Pending;Ready + // +kubebuilder:validation:Enum=Pending;Ready;Failed State string `json:"state,omitempty"` // Error denotes the error occurred during provisioning a volume. diff --git a/pkg/apis/openebs.io/device/v1alpha1/devicevolumefunctions.go b/pkg/apis/openebs.io/device/v1alpha1/devicevolumefunctions.go new file mode 100644 index 00000000..719c4141 --- /dev/null +++ b/pkg/apis/openebs.io/device/v1alpha1/devicevolumefunctions.go @@ -0,0 +1,23 @@ +/* +Copyright © 2019 The OpenEBS Authors + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package v1alpha1 + +import "fmt" + +func (e *VolumeError) Error() string { + return fmt.Sprintf("%s - %s", e.Code, e.Message) +} diff --git a/pkg/device/device-util.go b/pkg/device/device-util.go index 5fc35267..46095feb 100644 --- a/pkg/device/device-util.go +++ b/pkg/device/device-util.go @@ -159,7 +159,10 @@ func CreateVolume(vol *apis.DeviceVolume) error { disk, start, err := findBestPart(diskMetaName, capacityMiB) if err != nil { klog.Errorf("findBestPart Failed") - return err + return &apis.VolumeError{ + Code: apis.InsufficientCapacity, + Message: err.Error(), + } } return createPartAndWipeFS(disk, start, partitionName, capacityMiB, diskMetaName) } diff --git a/pkg/device/volume.go b/pkg/device/volume.go index cb84b819..20694c56 100644 --- a/pkg/device/volume.go +++ b/pkg/device/volume.go @@ -129,17 +129,21 @@ func GetDeviceVolumeState(volID string) (string, string, error) { } // UpdateVolInfo updates DeviceVolume CR with node id and finalizer -func UpdateVolInfo(vol *apis.DeviceVolume) error { - finalizers := []string{DeviceFinalizer} - labels := map[string]string{DeviceNodeKey: NodeID} - +func UpdateVolInfo(vol *apis.DeviceVolume, state string) error { + klog.Infof("Upadting the DeviceVol status to : %s", state) if vol.Finalizers != nil { return nil } + var finalizers []string + labels := map[string]string{DeviceNodeKey: NodeID} + switch state { + case DeviceStatusReady: + finalizers = append(finalizers, DeviceFinalizer) + } newVol, err := volbuilder.BuildFrom(vol). WithFinalizer(finalizers). - WithVolumeStatus(DeviceStatusReady). + WithVolumeStatus(state). WithLabels(labels).Build() if err != nil { diff --git a/pkg/mgmt/volume/volume.go b/pkg/mgmt/volume/volume.go index e45af91d..b7945821 100644 --- a/pkg/mgmt/volume/volume.go +++ b/pkg/mgmt/volume/volume.go @@ -84,13 +84,25 @@ func (c *VolController) syncVol(vol *apis.DeviceVolume) error { } return err } - // if finalizer is not set then it means we are creating - // the volume. And if it is set then volume has already been - // created and this event is for property change only. - if vol.Status.State != device.DeviceStatusReady { + + // if status is not Pending then we are just ignoring the event. + switch vol.Status.State { + case device.DeviceStatusFailed: + klog.Warningf("Skipping retrying device volume provisioning as its already in failed state: %+v", vol.Status.Error) + return nil + case device.DeviceStatusReady: + klog.Info("device volume already provisioned") + return nil + } + + // if the status Pending means we will try to create the volume + if vol.Status.State == device.DeviceStatusPending { err = device.CreateVolume(vol) if err == nil { - err = device.UpdateVolInfo(vol) + err = device.UpdateVolInfo(vol, device.DeviceStatusReady) + } else if custError, ok := err.(*apis.VolumeError); ok && custError.Code == apis.InsufficientCapacity { + vol.Status.Error = custError + return device.UpdateVolInfo(vol, device.DeviceStatusFailed) } } return err diff --git a/tests/provision_test.go b/tests/provision_test.go index 29c50d16..d2c80c3d 100644 --- a/tests/provision_test.go +++ b/tests/provision_test.go @@ -57,7 +57,14 @@ func blockVolCreationTest() { By("Deleting storage class", deleteStorageClass) } +func volCreationFailTest() { + By("Create and Verify DeviceVol resource", CreateDeviceVolume) + By("Checking the state of DeviceVol", IsDeviceVolStatusFailedEventually) + By("Deleting DeviceVol", deleteDeviceVol) +} + func volumeCreationTest() { By("Running volume creation test", fsVolCreationTest) By("Running block volume creation test", blockVolCreationTest) + By("Running the volume creation failed test", volCreationFailTest) } diff --git a/tests/suite_test.go b/tests/suite_test.go index 3401e525..57af1e17 100644 --- a/tests/suite_test.go +++ b/tests/suite_test.go @@ -22,6 +22,7 @@ import ( . "github.com/onsi/ginkgo" . "github.com/onsi/gomega" + "github.com/openebs/device-localpv/pkg/builder/nodebuilder" "github.com/openebs/device-localpv/pkg/builder/volbuilder" "github.com/openebs/device-localpv/tests/deploy" "github.com/openebs/device-localpv/tests/pod" @@ -43,6 +44,7 @@ const ( var ( DeviceClient *volbuilder.Kubeclient + NodeClient *nodebuilder.Kubeclient SCClient *sc.Kubeclient PVCClient *pvc.Kubeclient DeployClient *deploy.Kubeclient @@ -52,6 +54,7 @@ var ( LocalProvisioner = "device.csi.openebs.io" pvcName = "devicepv-pvc" appName = "busybox-devicepv" + DeviceVolName = "pvc-123" nsObj *corev1.Namespace scObj *storagev1.StorageClass @@ -76,6 +79,7 @@ func init() { DeployClient = deploy.NewKubeClient(deploy.WithKubeConfigPath(KubeConfigPath)) PodClient = pod.NewKubeClient(pod.WithKubeConfigPath(KubeConfigPath)) DeviceClient = volbuilder.NewKubeclient(volbuilder.WithKubeConfigPath(KubeConfigPath)) + NodeClient = nodebuilder.NewKubeclient(nodebuilder.WithKubeConfigPath(KubeConfigPath)) } func TestSource(t *testing.T) { diff --git a/tests/utils.go b/tests/utils.go index 50e85eed..3fa002cb 100644 --- a/tests/utils.go +++ b/tests/utils.go @@ -22,6 +22,7 @@ import ( "github.com/onsi/ginkgo" "github.com/onsi/gomega" + "github.com/openebs/device-localpv/pkg/builder/volbuilder" "github.com/openebs/device-localpv/pkg/device" "github.com/openebs/device-localpv/tests/container" "github.com/openebs/device-localpv/tests/deploy" @@ -398,3 +399,74 @@ func deletePVC(pvcname string) { status := IsPVCDeletedEventually(pvcname) gomega.Expect(status).To(gomega.Equal(true), "while trying to get deleted pvc") } + +// IsDeviceVolDeletedEventually : After deleted check if resource still exists +func IsDeviceVolDeletedEventually() bool { + ginkgo.By("Fetching device volume to check if its deleted") + return gomega.Eventually(func() bool { + _, err := DeviceClient.WithNamespace(OpenEBSNamespace).Get(DeviceVolName, metav1.GetOptions{}) + return k8serrors.IsNotFound(err) + }, + 120, 5). + Should(gomega.BeTrue()) + +} + +// CreateDeviceVolume : Create new DeviceVol resource +func CreateDeviceVolume() { + + nodeList, err := NodeClient.List(metav1.ListOptions{}) + gomega.Expect(err).To(gomega.BeNil(), "while retrieving DeviceNode") + gomega.Expect(len(nodeList.Items) > 0).To(gomega.BeTrue(), "while retrieving DeviceNode") + + ginkgo.By("Building DeviceVol") + capacity := "109951162777600" // 100Ti + volObj, err := volbuilder.NewBuilder(). + WithName(DeviceVolName). + WithCapacity(capacity). + WithDeviceName(DeviceMetaName). + WithOwnerNode(nodeList.Items[0].ObjectMeta.Name). + WithVolumeStatus(device.DeviceStatusPending).Build() + + ginkgo.By("Creating the above DeviceVol") + volObj, err = DeviceClient.WithNamespace(OpenEBSNamespace).Create(volObj) + gomega.Expect(err).To( + gomega.BeNil(), + "while creating DeviceVol {%s} in namespace {%s}", + DeviceVolName, + OpenEBSNamespace, + ) + + _, err = DeviceClient.WithNamespace(OpenEBSNamespace).Get(DeviceVolName, metav1.GetOptions{}) + gomega.Expect(err).To( + gomega.BeNil(), + "while retrieving DeviceVol {%s} in namespace {%s}", + DeviceVolName, + OpenEBSNamespace, + ) +} + +// IsDeviceVolStatusFailedEventually : Checks for the DeviceVol status and waits till it becomes failed +func IsDeviceVolStatusFailedEventually() { + status := gomega.Eventually(func() bool { + deviceVol, _ := DeviceClient.WithNamespace(OpenEBSNamespace).Get(DeviceVolName, metav1.GetOptions{}) + return deviceVol.Status.State == device.DeviceStatusFailed + }, + 120, 5). + Should(gomega.BeTrue()) + + gomega.Expect(status).To(gomega.Equal(true), "while checking the status of DeviceVol") + ginkgo.By("DeviceVol is in failed state") +} + +func deleteDeviceVol() { + err := DeviceClient.WithNamespace(OpenEBSNamespace).Delete(DeviceVolName) + gomega.Expect(err).To( + gomega.BeNil(), + "while deleting deviceVol {%s} in namespace {%s}", + DeviceVolName, + OpenEBSNamespace, + ) + status := IsDeviceVolDeletedEventually() + gomega.Expect(status).To(gomega.Equal(true), "while trying to get deleted DeviceVol") +}