From 42bf306d31ce21dac7a75a79b3c9da1378322694 Mon Sep 17 00:00:00 2001 From: Ivan Kolodiazhnyi Date: Tue, 22 Aug 2023 11:30:04 +0300 Subject: [PATCH] Parallel NICs configuraion --- api/v1/sriovoperatorconfig_types.go | 2 + cmd/sriov-network-config-daemon/service.go | 4 +- cmd/sriov-network-config-daemon/start.go | 9 ++- ...ork.openshift.io_sriovoperatorconfigs.yaml | 4 ++ ...ork.openshift.io_sriovoperatorconfigs.yaml | 4 ++ pkg/daemon/daemon.go | 6 +- pkg/daemon/daemon_test.go | 1 + pkg/daemon/plugin.go | 4 +- pkg/plugins/generic/generic_plugin.go | 30 +++++---- pkg/plugins/generic/generic_plugin_test.go | 2 +- pkg/utils/utils.go | 67 ++++++++++++++++++- 11 files changed, 108 insertions(+), 25 deletions(-) diff --git a/api/v1/sriovoperatorconfig_types.go b/api/v1/sriovoperatorconfig_types.go index fc4fe4b64..f38822a6d 100644 --- a/api/v1/sriovoperatorconfig_types.go +++ b/api/v1/sriovoperatorconfig_types.go @@ -43,6 +43,8 @@ type SriovOperatorConfigSpec struct { // Default mode: daemon // +kubebuilder:validation:Enum=daemon;systemd ConfigurationMode ConfigurationModeType `json:"configurationMode,omitempty"` + // Flag to enable NICs configuration in a parallel on the same node + ParallelNicConfig bool `json:"parallelNicConfig,omitempty"` } // SriovOperatorConfigStatus defines the observed state of SriovOperatorConfig diff --git a/cmd/sriov-network-config-daemon/service.go b/cmd/sriov-network-config-daemon/service.go index b300254ac..e27aef026 100644 --- a/cmd/sriov-network-config-daemon/service.go +++ b/cmd/sriov-network-config-daemon/service.go @@ -113,9 +113,9 @@ func runServiceCmd(cmd *cobra.Command, args []string) error { glog.Errorf("sriov-config-service: failed to discover sriov devices on the host: %v", err) return fmt.Errorf("sriov-config-service: failed to discover sriov devices on the host: %v", err) } - + // TODO(e0ne): read ParallelNicConfig from SriovOperatorConfig CR // Create the generic plugin - configPlugin, err = generic.NewGenericPlugin(true) + configPlugin, err = generic.NewGenericPlugin(true, false) if err != nil { glog.Errorf("sriov-config-service: failed to create generic plugin %v", err) return fmt.Errorf("sriov-config-service failed to create generic plugin %v", err) diff --git a/cmd/sriov-network-config-daemon/start.go b/cmd/sriov-network-config-daemon/start.go index b9ee61ac5..4e84fa2ed 100644 --- a/cmd/sriov-network-config-daemon/start.go +++ b/cmd/sriov-network-config-daemon/start.go @@ -52,9 +52,10 @@ var ( } startOpts struct { - kubeconfig string - nodeName string - systemd bool + kubeconfig string + nodeName string + systemd bool + parallelNicConfig bool } ) @@ -63,6 +64,7 @@ func init() { startCmd.PersistentFlags().StringVar(&startOpts.kubeconfig, "kubeconfig", "", "Kubeconfig file to access a remote cluster (testing only)") startCmd.PersistentFlags().StringVar(&startOpts.nodeName, "node-name", "", "kubernetes node name daemon is managing") startCmd.PersistentFlags().BoolVar(&startOpts.systemd, "use-systemd-service", false, "use config daemon in systemd mode") + startCmd.PersistentFlags().BoolVar(&startOpts.parallelNicConfig, "parallel-nic-config", false, "NICs configuration in a parallel on the same node") } func runStartCmd(cmd *cobra.Command, args []string) { @@ -208,6 +210,7 @@ func runStartCmd(cmd *cobra.Command, args []string) { platformType, startOpts.systemd, devMode, + startOpts.parallelNicConfig, ).Run(stopCh, exitCh) if err != nil { glog.Errorf("failed to run daemon: %v", err) diff --git a/config/crd/bases/sriovnetwork.openshift.io_sriovoperatorconfigs.yaml b/config/crd/bases/sriovnetwork.openshift.io_sriovoperatorconfigs.yaml index f1666e78e..0daad59f2 100644 --- a/config/crd/bases/sriovnetwork.openshift.io_sriovoperatorconfigs.yaml +++ b/config/crd/bases/sriovnetwork.openshift.io_sriovoperatorconfigs.yaml @@ -72,6 +72,10 @@ spec: maximum: 2 minimum: 0 type: integer + parallelNicConfig: + description: Flag to enable NICs configuration in a parallel on the + same node + type: boolean type: object status: description: SriovOperatorConfigStatus defines the observed state of SriovOperatorConfig diff --git a/deployment/sriov-network-operator/crds/sriovnetwork.openshift.io_sriovoperatorconfigs.yaml b/deployment/sriov-network-operator/crds/sriovnetwork.openshift.io_sriovoperatorconfigs.yaml index f1666e78e..0daad59f2 100644 --- a/deployment/sriov-network-operator/crds/sriovnetwork.openshift.io_sriovoperatorconfigs.yaml +++ b/deployment/sriov-network-operator/crds/sriovnetwork.openshift.io_sriovoperatorconfigs.yaml @@ -72,6 +72,10 @@ spec: maximum: 2 minimum: 0 type: integer + parallelNicConfig: + description: Flag to enable NICs configuration in a parallel on the + same node + type: boolean type: object status: description: SriovOperatorConfigStatus defines the observed state of SriovOperatorConfig diff --git a/pkg/daemon/daemon.go b/pkg/daemon/daemon.go index c7087a75b..85d7244d4 100644 --- a/pkg/daemon/daemon.go +++ b/pkg/daemon/daemon.go @@ -69,6 +69,8 @@ type Daemon struct { useSystemdService bool + parallelNicConfig bool + devMode bool client snclientset.Interface @@ -149,11 +151,13 @@ func New( platformType utils.PlatformType, useSystemdService bool, devMode bool, + parallelNicConfig bool, ) *Daemon { return &Daemon{ name: nodeName, platform: platformType, useSystemdService: useSystemdService, + parallelNicConfig: parallelNicConfig, devMode: devMode, client: client, kubeClient: kubeClient, @@ -517,7 +521,7 @@ func (dn *Daemon) nodeStateSyncHandler() error { // load plugins if it has not loaded if len(dn.enabledPlugins) == 0 { - dn.enabledPlugins, err = enablePlugins(dn.platform, dn.useSystemdService, latestState) + dn.enabledPlugins, err = enablePlugins(dn.platform, dn.useSystemdService, dn.parallelNicConfig, latestState) if err != nil { glog.Errorf("nodeStateSyncHandler(): failed to enable vendor plugins error: %v", err) return err diff --git a/pkg/daemon/daemon_test.go b/pkg/daemon/daemon_test.go index 1aac8d43d..6ad531d22 100644 --- a/pkg/daemon/daemon_test.go +++ b/pkg/daemon/daemon_test.go @@ -115,6 +115,7 @@ var _ = Describe("Config Daemon", func() { utils.Baremetal, false, false, + false, ) sut.enabledPlugins = map[string]plugin.VendorPlugin{generic.PluginName: &fake.FakePlugin{}} diff --git a/pkg/daemon/plugin.go b/pkg/daemon/plugin.go index 9639db88e..63023eb7e 100644 --- a/pkg/daemon/plugin.go +++ b/pkg/daemon/plugin.go @@ -28,7 +28,7 @@ var ( K8sPlugin = k8splugin.NewK8sPlugin ) -func enablePlugins(platform utils.PlatformType, useSystemdService bool, ns *sriovnetworkv1.SriovNetworkNodeState) (map[string]plugin.VendorPlugin, error) { +func enablePlugins(platform utils.PlatformType, useSystemdService, parallelNicConfig bool, ns *sriovnetworkv1.SriovNetworkNodeState) (map[string]plugin.VendorPlugin, error) { glog.Infof("enableVendorPlugins(): enabling plugins") enabledPlugins := map[string]plugin.VendorPlugin{} @@ -54,7 +54,7 @@ func enablePlugins(platform utils.PlatformType, useSystemdService bool, ns *srio } enabledPlugins[k8sPlugin.Name()] = k8sPlugin } - genericPlugin, err := GenericPlugin(false) + genericPlugin, err := GenericPlugin(false, parallelNicConfig) if err != nil { glog.Errorf("enableVendorPlugins(): failed to load the generic plugin error: %v", err) return nil, err diff --git a/pkg/plugins/generic/generic_plugin.go b/pkg/plugins/generic/generic_plugin.go index bc5b1129e..99ce4d055 100644 --- a/pkg/plugins/generic/generic_plugin.go +++ b/pkg/plugins/generic/generic_plugin.go @@ -47,19 +47,20 @@ type DriverState struct { type DriverStateMapType map[uint]*DriverState type GenericPlugin struct { - PluginName string - SpecVersion string - DesireState *sriovnetworkv1.SriovNetworkNodeState - LastState *sriovnetworkv1.SriovNetworkNodeState - DriverStateMap DriverStateMapType - RunningOnHost bool - HostManager host.HostManagerInterface + PluginName string + SpecVersion string + DesireState *sriovnetworkv1.SriovNetworkNodeState + LastState *sriovnetworkv1.SriovNetworkNodeState + DriverStateMap DriverStateMapType + RunningOnHost bool + ParallelNicConfig bool + HostManager host.HostManagerInterface } const scriptsPath = "bindata/scripts/enable-kargs.sh" // Initialize our plugin and set up initial values -func NewGenericPlugin(runningOnHost bool) (plugin.VendorPlugin, error) { +func NewGenericPlugin(runningOnHost, parallelNicConfig bool) (plugin.VendorPlugin, error) { driverStateMap := make(map[uint]*DriverState) driverStateMap[Vfio] = &DriverState{ DriverName: vfioPciDriver, @@ -84,11 +85,12 @@ func NewGenericPlugin(runningOnHost bool) (plugin.VendorPlugin, error) { } return &GenericPlugin{ - PluginName: PluginName, - SpecVersion: "1.0", - DriverStateMap: driverStateMap, - RunningOnHost: runningOnHost, - HostManager: host.NewHostManager(runningOnHost), + PluginName: PluginName, + SpecVersion: "1.0", + DriverStateMap: driverStateMap, + RunningOnHost: runningOnHost, + ParallelNicConfig: parallelNicConfig, + HostManager: host.NewHostManager(runningOnHost), }, nil } @@ -166,7 +168,7 @@ func (p *GenericPlugin) Apply() error { defer exit() } - if err := utils.SyncNodeState(p.DesireState, pfsToSkip); err != nil { + if err := utils.SyncNodeState(p.DesireState, pfsToSkip, p.ParallelNicConfig); err != nil { return err } p.LastState = &sriovnetworkv1.SriovNetworkNodeState{} diff --git a/pkg/plugins/generic/generic_plugin_test.go b/pkg/plugins/generic/generic_plugin_test.go index dc8201448..28c15e915 100644 --- a/pkg/plugins/generic/generic_plugin_test.go +++ b/pkg/plugins/generic/generic_plugin_test.go @@ -19,7 +19,7 @@ var _ = Describe("Generic plugin", func() { var genericPlugin plugin.VendorPlugin var err error BeforeEach(func() { - genericPlugin, err = NewGenericPlugin(false) + genericPlugin, err = NewGenericPlugin(false, false) Expect(err).ToNot(HaveOccurred()) }) diff --git a/pkg/utils/utils.go b/pkg/utils/utils.go index 231e2ab80..aca7bd03c 100644 --- a/pkg/utils/utils.go +++ b/pkg/utils/utils.go @@ -12,6 +12,7 @@ import ( "regexp" "strconv" "strings" + "sync" "syscall" "time" @@ -147,8 +148,69 @@ func DiscoverSriovDevices(withUnsupported bool) ([]sriovnetworkv1.InterfaceExt, } // SyncNodeState Attempt to update the node state to match the desired state -func SyncNodeState(newState *sriovnetworkv1.SriovNetworkNodeState, pfsToConfig map[string]bool) error { - return ConfigSriovInterfaces(newState.Spec.Interfaces, newState.Status.Interfaces, pfsToConfig) +func SyncNodeState(newState *sriovnetworkv1.SriovNetworkNodeState, pfsToConfig map[string]bool, parallelNicConfig bool) error { + if !parallelNicConfig { + return ConfigSriovInterfaces(newState.Spec.Interfaces, newState.Status.Interfaces, pfsToConfig) + } + return ConfigSriovInterfacesInParallel(newState.Spec.Interfaces, newState.Status.Interfaces, pfsToConfig) +} + +func ConfigSriovInterfacesInParallel(interfaces []sriovnetworkv1.Interface, ifaceStatuses []sriovnetworkv1.InterfaceExt, pfsToConfig map[string]bool) error { + if IsKernelLockdownMode(true) && hasMellanoxInterfacesInSpec(ifaceStatuses, interfaces) { + glog.Warningf("cannot use mellanox devices when in kernel lockdown mode") + return fmt.Errorf("cannot use mellanox devices when in kernel lockdown mode") + } + var err error + result := make(chan error) + defer close(result) + wg := sync.WaitGroup{} + wg.Add(len(ifaceStatuses)) + for _, ifaceStatus := range ifaceStatuses { + configured := false + for _, iface := range interfaces { + if iface.PciAddress == ifaceStatus.PciAddress { + configured = true + + if skip := pfsToConfig[iface.PciAddress]; skip { + break + } + + if !NeedUpdate(&iface, &ifaceStatus) { + glog.V(2).Infof("syncNodeState(): no need update interface %s", iface.PciAddress) + break + } + + wg.Add(1) + go func(iface *sriovnetworkv1.Interface, ifaceStatus *sriovnetworkv1.InterfaceExt, result chan error) { + err := configSriovDevice(iface, ifaceStatus) + defer wg.Done() + result <- err + }(&iface, &ifaceStatus, result) + + //if err = configSriovDevice(&iface, &ifaceStatus); err != nil { + // glog.Errorf("SyncNodeState(): fail to configure sriov interface %s: %v. resetting interface.", iface.PciAddress, err) + // if resetErr := resetSriovDevice(ifaceStatus); resetErr != nil { + // glog.Errorf("SyncNodeState(): fail to reset on error SR-IOV interface: %s", resetErr) + // } + // return err + //} + break + } + } + if !configured && ifaceStatus.NumVfs > 0 { + if skip := pfsToConfig[ifaceStatus.PciAddress]; skip { + continue + } + + if err = resetSriovDevice(ifaceStatus); err != nil { + return err + } + } + } + + wg.Wait() + + return nil } func ConfigSriovInterfaces(interfaces []sriovnetworkv1.Interface, ifaceStatuses []sriovnetworkv1.InterfaceExt, pfsToConfig map[string]bool) error { @@ -171,6 +233,7 @@ func ConfigSriovInterfaces(interfaces []sriovnetworkv1.Interface, ifaceStatuses glog.V(2).Infof("syncNodeState(): no need update interface %s", iface.PciAddress) break } + if err = configSriovDevice(&iface, &ifaceStatus); err != nil { glog.Errorf("SyncNodeState(): fail to configure sriov interface %s: %v. resetting interface.", iface.PciAddress, err) if resetErr := resetSriovDevice(ifaceStatus); resetErr != nil {