From 801845bf48a1a38bc616281343b518df02c6e94c Mon Sep 17 00:00:00 2001 From: Ivan Kolodiazhnyi Date: Wed, 18 Oct 2023 17:54:15 +0300 Subject: [PATCH 1/2] Add FeatureGates field into SriovOperatorConfig CRD FeatureGates is a map to enable experimental features. --- api/v1/sriovoperatorconfig_types.go | 2 ++ api/v1/zz_generated.deepcopy.go | 7 +++++++ .../sriovnetwork.openshift.io_sriovoperatorconfigs.yaml | 5 +++++ .../sriovnetwork.openshift.io_sriovoperatorconfigs.yaml | 5 +++++ 4 files changed, 19 insertions(+) diff --git a/api/v1/sriovoperatorconfig_types.go b/api/v1/sriovoperatorconfig_types.go index 27417ce11..25893e7fa 100644 --- a/api/v1/sriovoperatorconfig_types.go +++ b/api/v1/sriovoperatorconfig_types.go @@ -63,6 +63,8 @@ type SriovOperatorConfigSpec struct { UseCDI bool `json:"useCDI,omitempty"` // DisablePlugins is a list of sriov-network-config-daemon plugins to disable DisablePlugins PluginNameSlice `json:"disablePlugins,omitempty"` + // FeatureGates to enable experimental features + FeatureGates map[string]bool `json:"featureGates,omitempty"` } // SriovOperatorConfigStatus defines the observed state of SriovOperatorConfig diff --git a/api/v1/zz_generated.deepcopy.go b/api/v1/zz_generated.deepcopy.go index 38b027179..7465737f4 100644 --- a/api/v1/zz_generated.deepcopy.go +++ b/api/v1/zz_generated.deepcopy.go @@ -749,6 +749,13 @@ func (in *SriovOperatorConfigSpec) DeepCopyInto(out *SriovOperatorConfigSpec) { *out = make(PluginNameSlice, len(*in)) copy(*out, *in) } + if in.FeatureGates != nil { + in, out := &in.FeatureGates, &out.FeatureGates + *out = make(map[string]bool, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SriovOperatorConfigSpec. diff --git a/config/crd/bases/sriovnetwork.openshift.io_sriovoperatorconfigs.yaml b/config/crd/bases/sriovnetwork.openshift.io_sriovoperatorconfigs.yaml index 1f24bd8f5..74b7752ab 100644 --- a/config/crd/bases/sriovnetwork.openshift.io_sriovoperatorconfigs.yaml +++ b/config/crd/bases/sriovnetwork.openshift.io_sriovoperatorconfigs.yaml @@ -74,6 +74,11 @@ spec: provision switchdev-configuration.service and enable OpenvSwitch hw-offload on nodes. type: boolean + featureGates: + additionalProperties: + type: boolean + description: FeatureGates to enable experimental features + type: object logLevel: description: Flag to control the log verbose level of the operator. Set to '0' to show only the basic logs. And set to '2' to show all diff --git a/deployment/sriov-network-operator/crds/sriovnetwork.openshift.io_sriovoperatorconfigs.yaml b/deployment/sriov-network-operator/crds/sriovnetwork.openshift.io_sriovoperatorconfigs.yaml index 1f24bd8f5..74b7752ab 100644 --- a/deployment/sriov-network-operator/crds/sriovnetwork.openshift.io_sriovoperatorconfigs.yaml +++ b/deployment/sriov-network-operator/crds/sriovnetwork.openshift.io_sriovoperatorconfigs.yaml @@ -74,6 +74,11 @@ spec: provision switchdev-configuration.service and enable OpenvSwitch hw-offload on nodes. type: boolean + featureGates: + additionalProperties: + type: boolean + description: FeatureGates to enable experimental features + type: object logLevel: description: Flag to control the log verbose level of the operator. Set to '0' to show only the basic logs. And set to '2' to show all From 0aead49749b24a52c837553a3c14ca1f88228064 Mon Sep 17 00:00:00 2001 From: Ivan Kolodiazhnyi Date: Mon, 12 Feb 2024 11:30:57 +0200 Subject: [PATCH 2/2] Parallel NICs configuration --- bindata/manifests/daemon/daemonset.yaml | 3 + cmd/sriov-network-config-daemon/start.go | 12 +- controllers/sriovoperatorconfig_controller.go | 4 + pkg/consts/constants.go | 2 + pkg/host/internal/sriov/sriov.go | 254 ++++++++++++++---- pkg/vars/vars.go | 3 + 6 files changed, 219 insertions(+), 59 deletions(-) diff --git a/bindata/manifests/daemon/daemonset.yaml b/bindata/manifests/daemon/daemonset.yaml index 120d6069e..1fc4ce005 100644 --- a/bindata/manifests/daemon/daemonset.yaml +++ b/bindata/manifests/daemon/daemonset.yaml @@ -102,6 +102,9 @@ spec: {{- with index . "DisablePlugins" }} - --disable-plugins={{.}} {{- end }} + {{- if .ParallelNicConfig }} + - --parallel-nic-config + {{- end }} env: - name: NODE_NAME valueFrom: diff --git a/cmd/sriov-network-config-daemon/start.go b/cmd/sriov-network-config-daemon/start.go index 3ff895cae..e2d6cbdc2 100644 --- a/cmd/sriov-network-config-daemon/start.go +++ b/cmd/sriov-network-config-daemon/start.go @@ -78,10 +78,11 @@ var ( } startOpts struct { - kubeconfig string - nodeName string - systemd bool - disabledPlugins stringList + kubeconfig string + nodeName string + systemd bool + disabledPlugins stringList + parallelNicConfig bool } ) @@ -91,6 +92,7 @@ func init() { startCmd.PersistentFlags().StringVar(&startOpts.nodeName, "node-name", "", "kubernetes node name daemon is managing") startCmd.PersistentFlags().BoolVar(&startOpts.systemd, "use-systemd-service", false, "use config daemon in systemd mode") startCmd.PersistentFlags().VarP(&startOpts.disabledPlugins, "disable-plugins", "", "comma-separated list of plugins to disable") + startCmd.PersistentFlags().BoolVar(&startOpts.parallelNicConfig, "parallel-nic-config", false, "perform NIC configuration in parallel") } func runStartCmd(cmd *cobra.Command, args []string) error { @@ -104,6 +106,8 @@ func runStartCmd(cmd *cobra.Command, args []string) error { vars.UsingSystemdMode = true } + vars.ParallelNicConfig = startOpts.parallelNicConfig + if startOpts.nodeName == "" { name, ok := os.LookupEnv("NODE_NAME") if !ok || name == "" { diff --git a/controllers/sriovoperatorconfig_controller.go b/controllers/sriovoperatorconfig_controller.go index 41eae2a3b..06e4e2a2c 100644 --- a/controllers/sriovoperatorconfig_controller.go +++ b/controllers/sriovoperatorconfig_controller.go @@ -187,6 +187,10 @@ func (r *SriovOperatorConfigReconciler) syncConfigDaemonSet(ctx context.Context, } else { data.Data["UsedSystemdMode"] = false } + data.Data["ParallelNicConfig"] = false + if parallelConfig, ok := dc.Spec.FeatureGates[consts.ParallelNicConfigFeatureGate]; ok { + data.Data["ParallelNicConfig"] = parallelConfig + } envCniBinPath := os.Getenv("SRIOV_CNI_BIN_PATH") if envCniBinPath == "" { diff --git a/pkg/consts/constants.go b/pkg/consts/constants.go index f1edc903f..a5a938769 100644 --- a/pkg/consts/constants.go +++ b/pkg/consts/constants.go @@ -110,6 +110,8 @@ const ( KernelArgPciRealloc = "pci=realloc" KernelArgIntelIommu = "intel_iommu=on" KernelArgIommuPt = "iommu=pt" + + ParallelNicConfigFeatureGate = "parallelNicConfig" ) const ( diff --git a/pkg/host/internal/sriov/sriov.go b/pkg/host/internal/sriov/sriov.go index 3d8d0a48c..a43e44b2c 100644 --- a/pkg/host/internal/sriov/sriov.go +++ b/pkg/host/internal/sriov/sriov.go @@ -26,6 +26,11 @@ import ( mlx "github.com/k8snetworkplumbingwg/sriov-network-operator/pkg/vendors/mellanox" ) +type interfaceToConfigure struct { + iface sriovnetworkv1.Interface + ifaceStatus sriovnetworkv1.InterfaceExt +} + type sriov struct { utilsHelper utils.CmdInterface kernelHelper types.KernelInterface @@ -450,13 +455,43 @@ func (s *sriov) ConfigSriovDevice(iface *sriovnetworkv1.Interface, ifaceStatus * return nil } -func (s *sriov) ConfigSriovInterfaces(storeManager store.ManagerInterface, - interfaces []sriovnetworkv1.Interface, ifaceStatuses []sriovnetworkv1.InterfaceExt, pfsToConfig map[string]bool) error { +func (s *sriov) ConfigSriovInterfaces(storeManager store.ManagerInterface, interfaces []sriovnetworkv1.Interface, ifaceStatuses []sriovnetworkv1.InterfaceExt, pfsToConfig map[string]bool) error { if s.kernelHelper.IsKernelLockdownMode() && mlx.HasMellanoxInterfacesInSpec(ifaceStatuses, interfaces) { log.Log.Error(nil, "cannot use mellanox devices when in kernel lockdown mode") return fmt.Errorf("cannot use mellanox devices when in kernel lockdown mode") } + toBeConfigured, toBeResetted, err := s.getConfigureAndReset(storeManager, interfaces, ifaceStatuses, pfsToConfig) + if err != nil { + log.Log.Error(err, "cannot get a list of interfaces to configure") + return fmt.Errorf("cannot get a list of interfaces to configure") + } + + if vars.ParallelNicConfig { + err = s.configSriovInterfacesInParallel(storeManager, toBeConfigured) + } else { + err = s.configSriovInterfaces(storeManager, toBeConfigured) + } + if err != nil { + log.Log.Error(err, "cannot configure sriov interfaces") + return fmt.Errorf("cannot configure sriov interfaces") + } + + if vars.ParallelNicConfig { + err = s.resetSriovInterfacesInParallel(storeManager, toBeResetted) + } else { + err = s.resetSriovInterfaces(storeManager, toBeResetted) + } + if err != nil { + log.Log.Error(err, "cannot reset sriov interfaces") + return fmt.Errorf("cannot reset sriov interfaces") + } + return nil +} + +func (s *sriov) getConfigureAndReset(storeManager store.ManagerInterface, interfaces []sriovnetworkv1.Interface, ifaceStatuses []sriovnetworkv1.InterfaceExt, pfsToConfig map[string]bool) ([]interfaceToConfigure, []sriovnetworkv1.InterfaceExt, error) { + toBeConfigured := []interfaceToConfigure{} + toBeResetted := []sriovnetworkv1.InterfaceExt{} for _, ifaceStatus := range ifaceStatuses { configured := false for _, iface := range interfaces { @@ -467,76 +502,185 @@ func (s *sriov) ConfigSriovInterfaces(storeManager store.ManagerInterface, break } - if !sriovnetworkv1.NeedToUpdateSriov(&iface, &ifaceStatus) { - log.Log.V(2).Info("syncNodeState(): no need update interface", "address", iface.PciAddress) - - // Save the PF status to the host - err := storeManager.SaveLastPfAppliedStatus(&iface) - if err != nil { - log.Log.Error(err, "SyncNodeState(): failed to save PF applied config to host") - return err - } - - break - } - if err := s.ConfigSriovDevice(&iface, &ifaceStatus); err != nil { - log.Log.Error(err, "SyncNodeState(): fail to configure sriov interface. resetting interface.", "address", iface.PciAddress) - if iface.ExternallyManaged { - log.Log.Info("SyncNodeState(): skipping device reset as the nic is marked as externally created") - } else { - if resetErr := s.ResetSriovDevice(ifaceStatus); resetErr != nil { - log.Log.Error(resetErr, "SyncNodeState(): failed to reset on error SR-IOV interface") - } - } - return err - } - - // Save the PF status to the host - err := storeManager.SaveLastPfAppliedStatus(&iface) + skip, err := skipSriovConfig(&iface, &ifaceStatus, storeManager) if err != nil { - log.Log.Error(err, "SyncNodeState(): failed to save PF applied config to host") - return err + log.Log.Error(err, "getConfigureAndReset(): failed to check interface") + return nil, nil, err } - break + if skip { + break + } + iface := iface + ifaceStatus := ifaceStatus + toBeConfigured = append(toBeConfigured, interfaceToConfigure{iface: iface, ifaceStatus: ifaceStatus}) } } + if !configured && ifaceStatus.NumVfs > 0 { - if skip := pfsToConfig[ifaceStatus.PciAddress]; skip { - continue + if skip := pfsToConfig[ifaceStatus.PciAddress]; !skip { + toBeResetted = append(toBeResetted, ifaceStatus) } + } + } + return toBeConfigured, toBeResetted, nil +} - // load the PF info - pfStatus, exist, err := storeManager.LoadPfsStatus(ifaceStatus.PciAddress) - if err != nil { - log.Log.Error(err, "SyncNodeState(): failed to load info about PF status for device", - "address", ifaceStatus.PciAddress) - return err +func (s *sriov) configSriovInterfacesInParallel(storeManager store.ManagerInterface, interfaces []interfaceToConfigure) error { + log.Log.V(2).Info("configSriovInterfacesInParallel(): start sriov configuration") + + var result error + errChannel := make(chan error) + interfacesToConfigure := 0 + for ifaceIndex, iface := range interfaces { + interfacesToConfigure += 1 + go func(iface *interfaceToConfigure) { + var err error + if err = s.ConfigSriovDevice(&iface.iface, &iface.ifaceStatus); err != nil { + log.Log.Error(err, "configSriovInterfacesInParallel(): fail to configure sriov interface. resetting interface.", "address", iface.iface.PciAddress) + if iface.iface.ExternallyManaged { + log.Log.V(2).Info("configSriovInterfacesInParallel(): skipping device reset as the nic is marked as externally created") + } else { + if resetErr := s.ResetSriovDevice(iface.ifaceStatus); resetErr != nil { + log.Log.Error(resetErr, "configSriovInterfacesInParallel(): failed to reset on error SR-IOV interface") + err = resetErr + } + } } + errChannel <- err + }(&interfaces[ifaceIndex]) + // Save the PF status to the host + err := storeManager.SaveLastPfAppliedStatus(&iface.iface) + if err != nil { + log.Log.Error(err, "configSriovInterfacesInParallel(): failed to save PF applied config to host") + return err + } + } - if !exist { - log.Log.Info("SyncNodeState(): PF name with pci address has VFs configured but they weren't created by the sriov operator. Skipping the device reset", - "pf-name", ifaceStatus.Name, - "address", ifaceStatus.PciAddress) - continue + for i := 0; i < interfacesToConfigure; i++ { + errMsg := <-errChannel + result = errors.Join(result, errMsg) + } + if result != nil { + log.Log.Error(result, "configSriovInterfacesInParallel(): fail to configure sriov interfaces") + return result + } + log.Log.V(2).Info("configSriovInterfacesInParallel(): sriov configuration finished") + return nil +} + +func (s *sriov) resetSriovInterfacesInParallel(storeManager store.ManagerInterface, interfaces []sriovnetworkv1.InterfaceExt) error { + var result error + errChannel := make(chan error, len(interfaces)) + interfacesToReset := 0 + for ifaceIndex := range interfaces { + interfacesToReset += 1 + go func(iface *sriovnetworkv1.InterfaceExt) { + var err error + if err = s.checkForConfigAndReset(*iface, storeManager); err != nil { + log.Log.Error(err, "resetSriovInterfacesInParallel(): fail to reset sriov interface. resetting interface.", "address", iface.PciAddress) } + errChannel <- err + }(&interfaces[ifaceIndex]) + } - if pfStatus.ExternallyManaged { - log.Log.Info("SyncNodeState(): PF name with pci address was externally created skipping the device reset", - "pf-name", ifaceStatus.Name, - "address", ifaceStatus.PciAddress) - continue + for i := 0; i < interfacesToReset; i++ { + errMsg := <-errChannel + result = errors.Join(result, errMsg) + } + if result != nil { + log.Log.Error(result, "resetSriovInterfacesInParallel(): fail to reset sriov interface") + return result + } + log.Log.V(2).Info("resetSriovInterfacesInParallel(): sriov reset finished") + + return nil +} + +func (s *sriov) configSriovInterfaces(storeManager store.ManagerInterface, interfaces []interfaceToConfigure) error { + log.Log.V(2).Info("configSriovInterfaces(): start sriov configuration") + for _, iface := range interfaces { + if err := s.ConfigSriovDevice(&iface.iface, &iface.ifaceStatus); err != nil { + log.Log.Error(err, "configSriovInterfaces(): fail to configure sriov interface. resetting interface.", "address", iface.iface.PciAddress) + if iface.iface.ExternallyManaged { + log.Log.V(2).Info("configSriovInterfaces(): skipping device reset as the nic is marked as externally created") } else { - err = s.udevHelper.RemoveUdevRule(ifaceStatus.PciAddress) - if err != nil { - return err + if resetErr := s.ResetSriovDevice(iface.ifaceStatus); resetErr != nil { + log.Log.Error(resetErr, "configSriovInterfaces(): failed to reset on error SR-IOV interface") } } + return err + } - if err = s.ResetSriovDevice(ifaceStatus); err != nil { - return err - } + // Save the PF status to the host + err := storeManager.SaveLastPfAppliedStatus(&iface.iface) + if err != nil { + log.Log.Error(err, "configSriovInterfaces(): failed to save PF applied config to host") + return err + } + } + log.Log.V(2).Info("configSriovInterfaces(): sriov configuration finished") + return nil +} + +func (s *sriov) resetSriovInterfaces(storeManager store.ManagerInterface, interfaces []sriovnetworkv1.InterfaceExt) error { + for _, iface := range interfaces { + if err := s.checkForConfigAndReset(iface, storeManager); err != nil { + log.Log.Error(err, "resetSriovInterfaces(): failed to reset sriov interface. resetting interface.", "address", iface.PciAddress) + return err } } + log.Log.V(2).Info("resetSriovInterfaces(): sriov reset finished") + return nil +} + +// / skipSriovConfig checks if we need to apply SR-IOV configuration specified specific interface +func skipSriovConfig(iface *sriovnetworkv1.Interface, ifaceStatus *sriovnetworkv1.InterfaceExt, storeManager store.ManagerInterface) (bool, error) { + if !sriovnetworkv1.NeedToUpdateSriov(iface, ifaceStatus) { + log.Log.V(2).Info("ConfigSriovInterfaces(): no need update interface", "address", iface.PciAddress) + + // Save the PF status to the host + err := storeManager.SaveLastPfAppliedStatus(iface) + if err != nil { + log.Log.Error(err, "ConfigSriovInterfaces(): failed to save PF applied status config to host") + return false, err + } + + return true, nil + } + return false, nil +} + +func (s *sriov) checkForConfigAndReset(ifaceStatus sriovnetworkv1.InterfaceExt, storeManager store.ManagerInterface) error { + // load the PF info + pfStatus, exist, err := storeManager.LoadPfsStatus(ifaceStatus.PciAddress) + if err != nil { + log.Log.Error(err, "checkForConfigAndReset(): failed to load info about PF status for device", + "address", ifaceStatus.PciAddress) + return err + } + + if !exist { + log.Log.V(2).Info("checkForConfigAndReset(): PF name with pci address has VFs configured but they weren't created by the sriov operator. Skipping the device reset", + "pf-name", ifaceStatus.Name, + "address", ifaceStatus.PciAddress) + return nil + } + + if pfStatus.ExternallyManaged { + log.Log.V(2).Info("checkForConfigAndReset(): PF name with pci address was externally created skipping the device reset", + "pf-name", ifaceStatus.Name, + "address", ifaceStatus.PciAddress) + return nil + } + err = s.udevHelper.RemoveUdevRule(ifaceStatus.PciAddress) + if err != nil { + return err + } + + if err = s.ResetSriovDevice(ifaceStatus); err != nil { + return err + } + return nil } diff --git a/pkg/vars/vars.go b/pkg/vars/vars.go index 555d4389e..7fd976e5a 100644 --- a/pkg/vars/vars.go +++ b/pkg/vars/vars.go @@ -48,6 +48,9 @@ var ( // UsingSystemdMode global variable to mark the config-daemon is running on systemd mode UsingSystemdMode = false + // ParallelNicConfig global variable to perform NIC configuration in parallel + ParallelNicConfig = false + // FilesystemRoot used by test to mock interactions with filesystem FilesystemRoot = ""