diff --git a/pkg/plugins/generic/generic_plugin.go b/pkg/plugins/generic/generic_plugin.go index f8f0e9fb0..b1ececc8f 100644 --- a/pkg/plugins/generic/generic_plugin.go +++ b/pkg/plugins/generic/generic_plugin.go @@ -71,7 +71,7 @@ func (p *GenericPlugin) OnNodeStateChange(new *sriovnetworkv1.SriovNetworkNodeSt p.DesireState = new needDrain = needDrainNode(new.Spec.Interfaces, new.Status.Interfaces) - needReboot = p.needRebootNode(new) + needReboot, err = p.needRebootNode(new) if needReboot { needDrain = true @@ -202,30 +202,34 @@ func (p *GenericPlugin) AddToDesiredKernelParams(kparam string) { } } -// SetAllDesiredKernelParams Should be called to set all the kernel parameters. Returns true if reboot of the node is needed. -func (p *GenericPlugin) SetAllDesiredKernelParams() bool { - needReboot := false +// SetAllDesiredKernelParams Should be called to set all the kernel parameters. Updates needReboot if reboot is needed. +func (p *GenericPlugin) SetAllDesiredKernelParams(needReboot *bool) error { for kparam, attempts := range p.DesiredKernelParams { - if !utils.IsKernelCmdLineParamSet(kparam, false) { - if attempts > 0 && attempts <= 4 { - glog.Errorf("generic-plugin SetAllDesiredKernelParams(): Fail to set kernel param %s after reboot with attempts %d", kparam, attempts) - } else if attempts > 4 { - // If we tried several times and was unsuccessful, we should give up. - continue + set, err := utils.IsKernelCmdLineParamSet(kparam, false) + if err != nil { + return err + } + if !set { + if attempts > 0 { + glog.Errorf("generic-plugin SetAllDesiredKernelParams(): failed to set kernel param %s with attempts %d", kparam, attempts) } + // There is a case when we try to set the kernel parameter here, the daemon could decide to not reboot because + // the daemon encountered a potentially one-time error. However we always want to make sure that the kernel + // parameter is set once the daemon goes through node state sync again. update, err := trySetKernelParam(kparam) if err != nil { - glog.Errorf("generic-plugin SetAllDesiredKernelParams(): Fail to set kernel param %s: %v", kparam, err) + glog.Errorf("generic-plugin SetAllDesiredKernelParams(): fail to set kernel param %s: %v", kparam, err) + return err } if update { - glog.V(2).Infof("generic-plugin SetAllDesiredKernelParams(): Need reboot for setting kernel param %s", kparam) + glog.V(2).Infof("generic-plugin SetAllDesiredKernelParams(): need reboot for setting kernel param %s", kparam) } // Update the number of attempts we tried to set the kernel parameter. p.DesiredKernelParams[kparam]++ - needReboot = needReboot || update + *needReboot = *needReboot || update } } - return needReboot + return nil } func needDrainNode(desired sriovnetworkv1.Interfaces, current sriovnetworkv1.InterfaceExts) (needDrain bool) { @@ -257,8 +261,8 @@ func needDrainNode(desired sriovnetworkv1.Interfaces, current sriovnetworkv1.Int return } -func (p *GenericPlugin) needRebootNode(state *sriovnetworkv1.SriovNetworkNodeState) (needReboot bool) { - needReboot = false +func (p *GenericPlugin) needRebootNode(state *sriovnetworkv1.SriovNetworkNodeState) (bool, error) { + needReboot := false if p.LoadVfioDriver != loaded { if needVfioDriver(state) { p.LoadVfioDriver = loading @@ -273,13 +277,20 @@ func (p *GenericPlugin) needRebootNode(state *sriovnetworkv1.SriovNetworkNodeSta } } + err := p.SetAllDesiredKernelParams(&needReboot) + if err != nil { + glog.Errorf("generic-plugin needRebootNode(): failed to set the desired kernel parameters") + return false, err + } + update, err := utils.WriteSwitchdevConfFile(state) if err != nil { glog.Errorf("generic-plugin needRebootNode(): fail to write switchdev device config file") + return false, err } if update { glog.V(2).Infof("generic-plugin needRebootNode(): need reboot for updating switchdev device configuration") } - needReboot = needReboot || update || p.SetAllDesiredKernelParams() - return + needReboot = needReboot || update + return needReboot, nil } diff --git a/pkg/utils/utils.go b/pkg/utils/utils.go index 45e6381b3..d0c7b4bc3 100644 --- a/pkg/utils/utils.go +++ b/pkg/utils/utils.go @@ -32,7 +32,7 @@ const ( sysBusPciDrivers = "/sys/bus/pci/drivers" sysBusPciDriversProbe = "/sys/bus/pci/drivers_probe" sysClassNet = "/sys/class/net" - sysKernelCmdLine = "/proc/cmdline" + procKernelCmdLine = "/proc/cmdline" netClass = 0x02 numVfsFile = "sriov_numvfs" @@ -57,24 +57,20 @@ func init() { } // IsKernelCmdLineParamSet This checks if the kernel cmd line is set properly. -func IsKernelCmdLineParamSet(param string, chroot bool) bool { - path := sysKernelCmdLine +func IsKernelCmdLineParamSet(param string, chroot bool) (bool, error) { + path := procKernelCmdLine if !chroot { path = "/host" + path } cmdLine, err := os.ReadFile(path) if err != nil { - glog.Warningf("IsKernelCmdLineParamSet(): Error reading %s, Please check the Kernel Params manually!", sysKernelCmdLine) - // Although intuitively we should report false here. However if we cannot reliably read the Kernel's cmd line params - // then it is unknown whether it was successfully set, and to avoid unnecessary reboots we should just post a warning - // and return true. - return true + return false, fmt.Errorf("IsKernelCmdLineParamSet(): Error reading %s: %v", procKernelCmdLine, err) } if strings.Contains(string(cmdLine), param) { - return true + return true, nil } - return false + return false, nil } func DiscoverSriovDevices(withUnsupported bool) ([]sriovnetworkv1.InterfaceExt, error) {