Skip to content

Commit

Permalink
VRF manager fixup and address comments
Browse files Browse the repository at this point in the history
Signed-off-by: Tim Rozet <[email protected]>
  • Loading branch information
trozet committed Aug 7, 2024
1 parent ea7ae5d commit d3dafad
Show file tree
Hide file tree
Showing 6 changed files with 65 additions and 119 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ func (ncm *nodeNetworkControllerManager) CleanupDeletedNetworks(validNetworks ..
if !util.IsNetworkSegmentationSupportEnabled() {
return nil
}
validVrfDevices := make(sets.Set[string])
validVRFDevices := make(sets.Set[string])
for _, network := range validNetworks {
if !network.IsPrimaryNetwork() {
continue
Expand All @@ -68,9 +68,9 @@ func (ncm *nodeNetworkControllerManager) CleanupDeletedNetworks(validNetworks ..
klog.Errorf("Failed to get network identifier for network %s, error: %s", network.GetNetworkName(), err)
continue
}
validVrfDevices.Insert(util.GetVRFDeviceNameForUDN(networkID))
validVRFDevices.Insert(util.GetVRFDeviceNameForUDN(networkID))
}
return ncm.vrfManager.Repair(validVrfDevices)
return ncm.vrfManager.Repair(validVRFDevices)
}

func (ncm *nodeNetworkControllerManager) getNetworkID(network util.BasicNetInfo) (int, error) {
Expand Down Expand Up @@ -183,19 +183,18 @@ func (ncm *nodeNetworkControllerManager) Start(ctx context.Context) (err error)
if ncm.nadController != nil {
err = ncm.nadController.Start()
if err != nil {
return fmt.Errorf("failed to start default node network controller: %v", err)
return fmt.Errorf("failed to start NAD controller: %w", err)
}
}

if ncm.vrfManager != nil {
// Let's create VRF manager that will manage VRFs for all UDNs
err = ncm.vrfManager.Run(ncm.stopChan, ncm.wg)
if err != nil {
err = fmt.Errorf("failed to run VRF Manager: %v", err)
return fmt.Errorf("failed to run VRF Manager: %w", err)
}
}

return err
return nil
}

// Stop gracefully stops all managed controllers
Expand Down
5 changes: 1 addition & 4 deletions go-controller/pkg/node/gateway_udn.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ import (
"strings"

v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/util/sets"
listers "k8s.io/client-go/listers/core/v1"
"k8s.io/klog/v2"
utilnet "k8s.io/utils/net"
Expand Down Expand Up @@ -58,9 +57,7 @@ func (udng *UserDefinedNetworkGateway) AddNetwork() error {
}
vrfDeviceName := util.GetVRFDeviceNameForUDN(udng.networkID)
vrfTableId := util.CalculateRouteTableID(mplink.Attrs().Index)
slaveInterfaces := make(sets.Set[string])
slaveInterfaces.Insert(mplink.Attrs().Name)
err = udng.vrfManager.AddVRF(vrfDeviceName, slaveInterfaces, uint32(vrfTableId))
err = udng.vrfManager.AddVRF(vrfDeviceName, mplink.Attrs().Name, uint32(vrfTableId))
if err != nil {
return fmt.Errorf("could not add VRF %d for network %s, err: %v", vrfTableId, udng.GetNetworkName(), err)
}
Expand Down
104 changes: 50 additions & 54 deletions go-controller/pkg/node/vrfmanager/vrf_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,27 @@ package vrfmanager

import (
"fmt"

"strings"
"sync"
"time"

"github.com/containernetworking/plugins/pkg/ns"
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types"
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util"
utilerrors "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util/errors"

"github.com/vishvananda/netlink"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/klog/v2"
)

type vrf struct {
name string
table uint32
enslavedInterfaces sets.Set[string]
name string
table uint32
// managedSlave is the desired netlink interface who's master will be this VRF.
// It cannot be changed after VRF creation.
managedSlave string
}

type Controller struct {
Expand Down Expand Up @@ -116,6 +121,7 @@ func (vrfm *Controller) runInternal(stopChan <-chan struct{}, doneWg *sync.WaitG
return nil
}

// reconcile synchronizes all desired VRFs and removal of stale objects
func (vrfm *Controller) reconcile() error {
vrfm.mu.Lock()
defer vrfm.mu.Unlock()
Expand All @@ -124,12 +130,25 @@ func (vrfm *Controller) reconcile() error {
klog.V(5).Infof("VRF Manager: reconciling VRFs took %v", time.Since(start))
}()

var errorAggregate []error
validVRFDevices := make(sets.Set[string])
for _, vrf := range vrfm.vrfs {
validVRFDevices.Insert(vrf.name)
err := vrfm.sync(vrf)
if err != nil {
klog.Errorf("VRF Manager: error syncing VRF device %s during reconcile, err: %v", vrf.name, err)
errorAggregate = append(errorAggregate, fmt.Errorf("error syncing VRF %s: %v", vrf.name, err))
}
}

// clean up anything stale
if err := vrfm.repair(validVRFDevices); err != nil {
errorAggregate = append(errorAggregate, fmt.Errorf("error repairing VRFs: %v", err))
}

if len(errorAggregate) > 0 {
return utilerrors.Join(errorAggregate...)
}

return nil
}

Expand All @@ -143,6 +162,8 @@ func (vrfm *Controller) syncVRF(link netlink.Link) error {
return vrfm.sync(vrf)
}

// sync ensures that the netlink VRF device exists, and the managedSlave is enslaved to it.
// It does not handle removal of the VRF or managedSlave, other than if it detects a conflict while adding.
func (vrfm *Controller) sync(vrf vrf) error {
vrfLink, err := util.GetNetLinkOps().LinkByName(vrf.name)
var mustRecreate bool
Expand All @@ -153,7 +174,7 @@ func (vrfm *Controller) sync(vrf vrf) error {
vrfDev, ok := vrfLink.(*netlink.Vrf)
if ok && vrfDev.Table != vrf.table {
klog.Warningf("Found a conflict with existing VRF device table id for VRF device %s, recreating it", vrf.name)
err = vrfm.deleteVRF(vrf)
err = vrfm.deleteVRF(vrfLink)
if err != nil {
return fmt.Errorf("failed to delete existing VRF device %s to recreate, err: %w", vrf.name, err)
}
Expand Down Expand Up @@ -181,35 +202,24 @@ func (vrfm *Controller) sync(vrf vrf) error {
return fmt.Errorf("failed to get VRF device %s operationally up, err: %v", vrf.name, err)
}
}
existingEnslaves, err := getSlaveInterfaceNamesForVRF(vrfLink)
if err != nil {
return err
}
if existingEnslaves.Equal(vrf.enslavedInterfaces) {
return nil
}
for _, iface := range vrf.enslavedInterfaces.UnsortedList() {
if !existingEnslaves.Has(iface) {
err = enslaveInterfaceToVRF(vrf.name, iface)
if err != nil {
return fmt.Errorf("failed to enslave inteface %s into VRF device: %s, err: %v", iface, vrf.name, err)
}
if len(vrf.managedSlave) > 0 {
existingSlaves, err := getSlaveInterfaceNamesForVRF(vrfLink)
if err != nil {
return fmt.Errorf("failed to get existing slaves for VRF device %s, err: %v", vrfLink.Attrs().Name, err)
}
}
for _, iface := range existingEnslaves.UnsortedList() {
if !vrf.enslavedInterfaces.Has(iface) {
err = removeInterfaceFromVRF(vrf.name, iface)
if err != nil {
return fmt.Errorf("failed to remove inteface %s from VRF device: %s, err: %v", iface, vrf.name, err)
if !existingSlaves.Has(vrf.managedSlave) {
if err = enslaveInterfaceToVRF(vrf.name, vrf.managedSlave); err != nil {
return fmt.Errorf("failed to enslave interface %s into VRF device: %s, err: %v", vrf.managedSlave, vrf.name, err)
}
}
}

vrfm.vrfs[vrfLink.Attrs().Index] = vrf
return nil
}

// AddVRF adds a VRF device into the node.
func (vrfm *Controller) AddVRF(name string, slaveInterfaces sets.Set[string], table uint32) error {
func (vrfm *Controller) AddVRF(name string, slaveInterface string, table uint32) error {
vrfm.mu.Lock()
defer vrfm.mu.Unlock()

Expand All @@ -225,19 +235,19 @@ func (vrfm *Controller) AddVRF(name string, slaveInterfaces sets.Set[string], ta
vrfDev, ok = vrfm.vrfs[vrfLink.Attrs().Index]
if ok {
klog.V(5).Infof("VRF Manager: VRF %s already found in the cache", name)
if !vrfDev.enslavedInterfaces.Equal(slaveInterfaces) {
return fmt.Errorf("VRF Manager: enslave interfaces mismatch for VRF device %s", name)
if vrfDev.managedSlave != slaveInterface {
return fmt.Errorf("VRF Manager: slave interface mismatch for VRF device %s", name)
}
if vrfDev.table != table {
return fmt.Errorf("VRF Manager: table id mismatch for VRF device %s", name)
}
} else {
vrfDev = vrf{name, table, slaveInterfaces}
vrfDev = vrf{name, table, slaveInterface}
}
}

if err != nil && util.GetNetLinkOps().IsLinkNotFoundError(err) {
vrfDev = vrf{name, table, slaveInterfaces}
vrfDev = vrf{name, table, slaveInterface}
} else if err != nil {
return fmt.Errorf("failed to retrieve VRF device %s, err: %v", name, err)
}
Expand All @@ -249,20 +259,25 @@ func (vrfm *Controller) AddVRF(name string, slaveInterfaces sets.Set[string], ta
// device(s) for which DeleteVRF is never invoked.
// Assumptions: 1) The validVRFs list must contain device for which AddVRF
// is already invoked. 2) The device name(s) in validVRFs are suffixed
// with -vrf.
// with "-vrf" and prefixed with "mp".
func (vrfm *Controller) Repair(validVRFs sets.Set[string]) error {
vrfm.mu.Lock()
defer vrfm.mu.Unlock()

return vrfm.repair(validVRFs)
}

func (vrfm *Controller) repair(validVRFs sets.Set[string]) error {
links, err := util.GetNetLinkOps().LinkList()
if err != nil {
return fmt.Errorf("failed to list links on the node, err: %v", err)
}

for _, link := range links {
name := link.Attrs().Name
// Skip if the link is not a vrf type or name is not suffixed with -vrf.
if link.Type() != "vrf" || !strings.HasSuffix(name, types.UDNVRFDeviceSuffix) {
// Skip if the link is not a vrf type or name is not suffixed with -vrf or is not prefixed with mp.
if link.Type() != "vrf" || !strings.HasSuffix(name, types.UDNVRFDeviceSuffix) ||
!strings.HasPrefix(name, types.UDNVRFDevicePrefix) {
continue
}
if !validVRFs.Has(name) {
Expand Down Expand Up @@ -297,20 +312,14 @@ func (vrfm *Controller) DeleteVRF(name string) (err error) {
klog.V(5).Infof("VRF Manager: VRF %s not found in cache for deletion", name)
return nil
}
err = vrfm.deleteVRF(vrf)
err = vrfm.deleteVRF(vrfLink)
if err != nil {
return fmt.Errorf("failed to delete VRF device %s, err: %w", vrf.name, err)
}
return nil
}

func (vrfm *Controller) deleteVRF(vrf vrf) error {
link, err := util.GetNetLinkOps().LinkByName(vrf.name)
if err != nil && util.GetNetLinkOps().IsLinkNotFoundError(err) {
return nil
} else if err != nil {
return fmt.Errorf("failed to retrieve VRF device %s, err: %v", vrf.name, err)
}
func (vrfm *Controller) deleteVRF(link netlink.Link) error {
return util.GetNetLinkOps().LinkDelete(link)
}

Expand All @@ -329,6 +338,7 @@ func getSlaveInterfaceNamesForVRF(vrfLink netlink.Link) (sets.Set[string], error
}

func enslaveInterfaceToVRF(vrfName, ifName string) error {
klog.V(5).Infof("Enslaving interface %s to VRF: %s", ifName, vrfName)
iface, err := util.GetNetLinkOps().LinkByName(ifName)
if err != nil {
return fmt.Errorf("failed to retrieve interface %s, err: %v", ifName, err)
Expand All @@ -343,17 +353,3 @@ func enslaveInterfaceToVRF(vrfName, ifName string) error {
}
return nil
}

func removeInterfaceFromVRF(vrfName, ifName string) error {
iface, err := util.GetNetLinkOps().LinkByName(ifName)
if err != nil && util.GetNetLinkOps().IsLinkNotFoundError(err) {
return nil
} else if err != nil {
return fmt.Errorf("failed to retrieve interface %s, err: %v", ifName, err)
}
err = util.GetNetLinkOps().LinkSetNoMaster(iface)
if err != nil {
return fmt.Errorf("failed to remove interface %s from VRF %s: %v", ifName, vrfName, err)
}
return nil
}
Loading

0 comments on commit d3dafad

Please sign in to comment.