diff --git a/changelogs/unreleased/517-akhilerm b/changelogs/unreleased/517-akhilerm new file mode 100644 index 000000000..1c7813033 --- /dev/null +++ b/changelogs/unreleased/517-akhilerm @@ -0,0 +1 @@ +fix a bug causing blockdevice resources not to be created for all the disks if one of the disk goes bad \ No newline at end of file diff --git a/cmd/ndm_daemonset/probe/eventhandler.go b/cmd/ndm_daemonset/probe/eventhandler.go index 8997cb9a5..d15f05132 100644 --- a/cmd/ndm_daemonset/probe/eventhandler.go +++ b/cmd/ndm_daemonset/probe/eventhandler.go @@ -17,9 +17,11 @@ limitations under the License. package probe import ( + "github.com/openebs/node-disk-manager/blockdevice" "github.com/openebs/node-disk-manager/cmd/ndm_daemonset/controller" "github.com/openebs/node-disk-manager/pkg/features" libudevwrapper "github.com/openebs/node-disk-manager/pkg/udev" + "github.com/openebs/node-disk-manager/pkg/util" "k8s.io/klog" ) @@ -51,6 +53,8 @@ func (pe *ProbeEvent) addBlockDeviceEvent(msg controller.EventMessage) { isGPTBasedUUIDEnabled := features.FeatureGates.IsEnabled(features.GPTBasedUUID) isErrorDuringUpdate := false + erroredDevices := make([]string, 0) + // iterate through each block device and perform the add/update operation for _, device := range msg.Devices { klog.Infof("Processing details for %s", device.DevPath) @@ -62,12 +66,15 @@ func (pe *ProbeEvent) addBlockDeviceEvent(msg controller.EventMessage) { klog.Infof("Processed details for %s", device.DevPath) if isGPTBasedUUIDEnabled { + if isParentOrSlaveDevice(*device, erroredDevices) { + klog.Warningf("device: %s skipped, because the parent / slave device has errored", device.DevPath) + continue + } err := pe.addBlockDevice(*device, bdAPIList) if err != nil { isErrorDuringUpdate = true + erroredDevices = append(erroredDevices, device.DevPath) klog.Error(err) - // if error occurs we should start the scan again - break } } else { // if GPTBasedUUID is disabled and the device type is partition, @@ -121,3 +128,17 @@ func (pe *ProbeEvent) deleteBlockDeviceEvent(msg controller.EventMessage) { go Rescan(pe.Controller) } } + +// isParentOrSlaveDevice check if any of the errored device is a parent / slave to the +// given blockdevice +func isParentOrSlaveDevice(bd blockdevice.BlockDevice, erroredDevices []string) bool { + for _, erroredDevice := range erroredDevices { + if bd.DependentDevices.Parent == erroredDevice { + return true + } + if util.Contains(bd.DependentDevices.Slaves, erroredDevice) { + return true + } + } + return false +} diff --git a/cmd/ndm_daemonset/probe/eventhandler_test.go b/cmd/ndm_daemonset/probe/eventhandler_test.go index 34e586abd..7615b7663 100644 --- a/cmd/ndm_daemonset/probe/eventhandler_test.go +++ b/cmd/ndm_daemonset/probe/eventhandler_test.go @@ -251,3 +251,78 @@ func compareBlockDeviceList(t *testing.T, bdList1, bdList2 apis.BlockDeviceList) compareBlockDevice(t, bdList1.Items[i], bdList2.Items[i]) } } + +func TestIsParentOrSlaveDevice(t *testing.T) { + tests := map[string]struct { + bd blockdevice.BlockDevice + erroredDevices []string + want bool + }{ + "no devices in errored state": { + bd: blockdevice.BlockDevice{ + Identifier: blockdevice.Identifier{ + DevPath: "/dev/sda1", + }, + DependentDevices: blockdevice.DependentBlockDevices{ + Parent: "/dev/sda", + Partitions: nil, + Holders: nil, + Slaves: nil, + }, + }, + erroredDevices: nil, + want: false, + }, + "multiple devices in errored state with no matching BD": { + bd: blockdevice.BlockDevice{ + Identifier: blockdevice.Identifier{ + DevPath: "/dev/sda1", + }, + DependentDevices: blockdevice.DependentBlockDevices{ + Parent: "/dev/sda", + Partitions: nil, + Holders: nil, + Slaves: nil, + }, + }, + erroredDevices: []string{"/dev/sdb", "/dev/sdc"}, + want: false, + }, + "one device in errored state that is the parent of the given BD": { + bd: blockdevice.BlockDevice{ + Identifier: blockdevice.Identifier{ + DevPath: "/dev/sda1", + }, + DependentDevices: blockdevice.DependentBlockDevices{ + Parent: "/dev/sda", + Partitions: nil, + Holders: nil, + Slaves: nil, + }, + }, + erroredDevices: []string{"/dev/sda"}, + want: true, + }, + "one device in errored state that that the BD is a slave to": { + bd: blockdevice.BlockDevice{ + Identifier: blockdevice.Identifier{ + DevPath: "/dev/dm-0", + }, + DependentDevices: blockdevice.DependentBlockDevices{ + Parent: "", + Partitions: nil, + Holders: nil, + Slaves: []string{"/dev/sda", "/dev/sdb"}, + }, + }, + erroredDevices: []string{"/dev/sda", "/dev/sdc"}, + want: true, + }, + } + for name, tt := range tests { + t.Run(name, func(t *testing.T) { + got := isParentOrSlaveDevice(tt.bd, tt.erroredDevices) + assert.Equal(t, tt.want, got) + }) + } +}