diff --git a/pkg/daemon/daemon.go b/pkg/daemon/daemon.go index 727ffe6420..3803f3de80 100644 --- a/pkg/daemon/daemon.go +++ b/pkg/daemon/daemon.go @@ -436,6 +436,12 @@ func (dn *Daemon) nodeStateSyncHandler() error { } if latestState.GetGeneration() == 1 && len(latestState.Spec.Interfaces) == 0 { + err = utils.ClearPCIAddressFolder() + if err != nil { + glog.Errorf("failed to clear the PCI address configuration: %v", err) + return err + } + glog.V(0).Infof("nodeStateSyncHandler(): Name: %s, Interface policy spec not yet set by controller", latestState.Name) if latestState.Status.SyncStatus != "Succeeded" { dn.refreshCh <- Message{ diff --git a/pkg/plugins/generic/generic_plugin.go b/pkg/plugins/generic/generic_plugin.go index a2a7e1c00b..1f3cdd397f 100644 --- a/pkg/plugins/generic/generic_plugin.go +++ b/pkg/plugins/generic/generic_plugin.go @@ -198,6 +198,27 @@ func needDrainNode(desired sriovnetworkv1.Interfaces, current sriovnetworkv1.Int } } if !configured && ifaceStatus.NumVfs > 0 { + // load the PF info + pfStatus, exist, err := utils.LoadPfsStatus(ifaceStatus.PciAddress, true) + if err != nil { + glog.Errorf("generic-plugin needDrainNode(): failed to load info about PF status for pci address %s: %v", ifaceStatus.PciAddress, err) + continue + } + + if !exist { + glog.Infof("generic-plugin needDrainNode(): PF name %s with pci address %s has VFs configured but they weren't created by the sriov operator. Skipping drain", + ifaceStatus.Name, + ifaceStatus.PciAddress) + continue + } + + if pfStatus.ExternallyCreated { + glog.Infof("generic-plugin needDrainNode()(): PF name %s with pci address %s was externally created. Skipping drain", + ifaceStatus.Name, + ifaceStatus.PciAddress) + continue + } + glog.V(2).Infof("generic-plugin needDrainNode(): need drain, %v needs to be reset", ifaceStatus) needDrain = true return diff --git a/pkg/utils/host.go b/pkg/utils/host.go new file mode 100644 index 0000000000..7032b818f8 --- /dev/null +++ b/pkg/utils/host.go @@ -0,0 +1,135 @@ +package utils + +import ( + "encoding/json" + "fmt" + "os" + "path/filepath" + + "github.com/golang/glog" + + sriovnetworkv1 "github.com/k8snetworkplumbingwg/sriov-network-operator/api/v1" +) + +const ( + SriovConfBasePath = "/etc/sriov-operator" + PfAppliedConfig = SriovConfBasePath + "/pci" + HostSriovConfBasePath = "/host" + SriovConfBasePath + HostPfAppliedConfig = HostSriovConfBasePath + "/pci" +) + +type PfStatus struct { + NumVfs int `json:"numVfs"` + Mtu int `json:"mtu"` + LinkType string `json:"linkType"` + EswitchMode string `json:"eSwitchMode"` + ExternallyCreated bool `json:"externallyCreated"` +} + +// create the operator base folder on the host together with the pci folder to save the PF status objects as json files +func CreateOperatorConfigFolderIfNeeded() error { + _, err := os.Stat(SriovConfBasePath) + if err != nil { + if os.IsNotExist(err) { + err = os.Mkdir(SriovConfBasePath, os.ModeDir) + if err != nil { + return fmt.Errorf("failed to create the sriov config folder on host in path %s: %v", SriovConfBasePath, err) + } + } else { + return fmt.Errorf("failed to check if the sriov config folder on host in path %s exist: %v", SriovConfBasePath, err) + } + } + + _, err = os.Stat(PfAppliedConfig) + if err != nil { + if os.IsNotExist(err) { + err = os.Mkdir(PfAppliedConfig, os.ModeDir) + if err != nil { + return fmt.Errorf("failed to create the pci folder on host in path %s: %v", PfAppliedConfig, err) + } + } else { + return fmt.Errorf("failed to check if the pci folder on host in path %s exist: %v", PfAppliedConfig, err) + } + } + + return nil +} + +func ClearPCIAddressFolder() error { + _, err := os.Stat(HostPfAppliedConfig) + if err != nil { + if os.IsNotExist(err) { + return nil + } + return fmt.Errorf("failed to check the pci address folder path %s", HostPfAppliedConfig) + } + + err = os.RemoveAll(HostPfAppliedConfig) + if err != nil { + return fmt.Errorf("failed to remove the PCI address folder on path %s: %v", HostPfAppliedConfig, err) + } + + err = os.Mkdir(HostPfAppliedConfig, os.ModeDir) + if err != nil { + return fmt.Errorf("failed to create the pci folder on host in path %s: %v", HostPfAppliedConfig, err) + } + + return nil +} + +func CreatePfAppliedStatusFromSpec(p *sriovnetworkv1.Interface) *PfStatus { + return &PfStatus{ + ExternallyCreated: p.ExternallyCreated, + NumVfs: p.NumVfs, + EswitchMode: p.EswitchMode, + Mtu: p.Mtu, + LinkType: p.LinkType, + } +} + +// SaveLastPfAppliedStatus will save the PF object as a json into the /etc/sriov-operator/pci/ +// this function must be called after running the chroot function +func SaveLastPfAppliedStatus(pciAddress string, pfStatus *PfStatus) error { + if err := CreateOperatorConfigFolderIfNeeded(); err != nil { + return err + } + + data, err := json.Marshal(pfStatus) + if err != nil { + glog.Errorf("failed to marshal PF status %+v: %v", *pfStatus, err) + return err + } + + path := filepath.Join(PfAppliedConfig, pciAddress) + err = os.WriteFile(path, data, 0644) + return err +} + +// LoadPfsStatus convert the /etc/sriov-operator/pci/ json to pfstatus +// returns false if the file doesn't exist. +func LoadPfsStatus(pciAddress string, chroot bool) (*PfStatus, bool, error) { + if chroot { + exit, err := Chroot("/host") + if err != nil { + return nil, false, err + } + defer exit() + } + + pfStatus := &PfStatus{} + path := filepath.Join(PfAppliedConfig, pciAddress) + data, err := os.ReadFile(path) + if err != nil { + if os.IsNotExist(err) { + return nil, false, nil + } + glog.Errorf("failed to read PF status from path %s: %v", path, err) + } + + err = json.Unmarshal(data, pfStatus) + if err != nil { + glog.Errorf("failed to unmarshal PF status %s: %v", data, err) + } + + return pfStatus, true, nil +} diff --git a/pkg/utils/utils.go b/pkg/utils/utils.go index 30601847c5..429252c739 100644 --- a/pkg/utils/utils.go +++ b/pkg/utils/utils.go @@ -121,6 +121,15 @@ func DiscoverSriovDevices(withUnsupported bool) ([]sriovnetworkv1.InterfaceExt, } iface.LinkType = getLinkType(iface) + pfStatus, exist, err := LoadPfsStatus(iface.PciAddress, true) + if err != nil { + glog.Warningf("DiscoverSriovDevices(): failed to load PF status from disk: %v", err) + } + + if exist { + iface.ExternallyCreated = pfStatus.ExternallyCreated + } + if dputils.IsSriovPF(device.Address) { iface.TotalVfs = dputils.GetSriovVFcapacity(device.Address) iface.NumVfs = dputils.GetVFconfigured(device.Address) @@ -164,15 +173,34 @@ func SyncNodeState(newState *sriovnetworkv1.SriovNetworkNodeState, pfsToConfig m if !NeedUpdate(&iface, &ifaceStatus) { glog.V(2).Infof("syncNodeState(): no need update interface %s", iface.PciAddress) + + // Save the PF status to the host + err = SaveLastPfAppliedStatus(iface.PciAddress, CreatePfAppliedStatusFromSpec(&iface)) + if err != nil { + glog.Errorf("SyncNodeState(): failed to save PF applied config to host: %v", err) + return err + } + break } if err = configSriovDevice(&iface, &ifaceStatus); err != nil { glog.Errorf("SyncNodeState(): fail to configure sriov interface %s: %v. resetting interface.", iface.PciAddress, err) - if resetErr := resetSriovDevice(ifaceStatus); resetErr != nil { - glog.Errorf("SyncNodeState(): fail to reset on error SR-IOV interface: %s", resetErr) + if iface.ExternallyCreated { + glog.Infof("SyncNodeState(): skipping device reset as the nic is marked as externally created") + } else { + if resetErr := resetSriovDevice(ifaceStatus); resetErr != nil { + glog.Errorf("SyncNodeState(): failed to reset on error SR-IOV interface: %s", resetErr) + } } return err } + + // Save the PF status to the host + err = SaveLastPfAppliedStatus(iface.PciAddress, CreatePfAppliedStatusFromSpec(&iface)) + if err != nil { + glog.Errorf("SyncNodeState(): failed to save PF applied config to host: %v", err) + return err + } break } } @@ -181,6 +209,27 @@ func SyncNodeState(newState *sriovnetworkv1.SriovNetworkNodeState, pfsToConfig m continue } + // load the PF info + pfStatus, exist, err := LoadPfsStatus(ifaceStatus.PciAddress, false) + if err != nil { + glog.Errorf("SyncNodeState(): failed to load info about PF status for pci address %s: %v", ifaceStatus.PciAddress, err) + return err + } + + if !exist { + glog.Infof("SyncNodeState(): PF name %s with pci address %s has VFs configured but they weren't created by the sriov operator. Skipping the device reset", + ifaceStatus.Name, + ifaceStatus.PciAddress) + continue + } + + if pfStatus.ExternallyCreated { + glog.Infof("SyncNodeState(): PF name %s with pci address %s was externally created skipping the device reset", + ifaceStatus.Name, + ifaceStatus.PciAddress) + continue + } + if err = resetSriovDevice(ifaceStatus); err != nil { return err } @@ -269,6 +318,12 @@ func NeedUpdate(iface *sriovnetworkv1.Interface, ifaceStatus *sriovnetworkv1.Int glog.V(2).Infof("NeedUpdate(): VF %d MTU needs update, desired=%d, current=%d", vf.VfID, group.Mtu, vf.Mtu) return true } + + // this is needed to be sure the admin mac address is configured as expected + if iface.ExternallyCreated { + glog.V(2).Infof("NeedUpdate(): need to update the device as it's externally manage for pci address %s", ifaceStatus.PciAddress) + return true + } } break }