Skip to content

Commit

Permalink
Merge pull request ovn-kubernetes#4548 from tssurya/udn-add-vrf-manager
Browse files Browse the repository at this point in the history
UDN: Add VRF Manager
  • Loading branch information
trozet authored Aug 7, 2024
2 parents 0be2a90 + d3dafad commit 27ec2fa
Show file tree
Hide file tree
Showing 18 changed files with 985 additions and 57 deletions.
1 change: 1 addition & 0 deletions go-controller/cmd/ovnkube/ovnkube.go
Original file line number Diff line number Diff line change
Expand Up @@ -533,6 +533,7 @@ func runOvnKube(ctx context.Context, runMode *ovnkubeRunMode, ovnClientset *util
ovnClientset,
watchFactory,
runMode.identity,
wg,
eventRecorder)
if err != nil {
nodeErr = fmt.Errorf("failed to create node network controller: %w", err)
Expand Down
2 changes: 1 addition & 1 deletion go-controller/hack/test-go.sh
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ function testrun {
}

# These packages requires root for network namespace manipulation in unit tests
root_pkgs=("github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/iptables" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/rulemanager" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/routemanager" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/controllers/egressip")
root_pkgs=("github.com/ovn-org/ovn-kubernetes/go-controller/pkg/network-controller-manager" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/iptables" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/rulemanager" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/routemanager" "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/controllers/egressip")

# These packages are big and require more than the 10m default to run the unit tests
big_pkgs=("github.com/ovn-org/ovn-kubernetes/go-controller/pkg/ovn")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,23 +71,28 @@ func NewNetAttachDefinitionController(
ncm NetworkControllerManager,
wf watchFactory,
) (*NetAttachDefinitionController, error) {
nadInformer := wf.NADInformer()
nadController := &NetAttachDefinitionController{
name: fmt.Sprintf("[%s NAD controller]", name),
netAttachDefLister: nadInformer.Lister(),
networkManager: newNetworkManager(name, ncm),
networks: map[string]util.NetInfo{},
nads: map[string]string{},
name: fmt.Sprintf("[%s NAD controller]", name),
networkManager: newNetworkManager(name, ncm),
networks: map[string]util.NetInfo{},
nads: map[string]string{},
}

config := &controller.ControllerConfig[nettypes.NetworkAttachmentDefinition]{
RateLimiter: workqueue.DefaultControllerRateLimiter(),
Informer: nadInformer.Informer(),
Lister: nadController.netAttachDefLister.List,
Reconcile: nadController.sync,
ObjNeedsUpdate: nadNeedsUpdate,
// this controller is not thread safe
Threadiness: 1,
}

nadInformer := wf.NADInformer()
if nadInformer != nil {
nadController.netAttachDefLister = nadInformer.Lister()
config.Informer = nadInformer.Informer()
config.Lister = nadController.netAttachDefLister.List
}

nadController.controller = controller.NewController(
nadController.name,
config,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"context"
"fmt"
"strings"
"sync"
"time"

"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/cni"
Expand All @@ -12,9 +13,11 @@ import (
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/kube"
nad "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/network-attach-def-controller"
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node"
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/node/vrfmanager"
ovntypes "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types"
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util"

"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/client-go/tools/record"
"k8s.io/klog/v2"
Expand All @@ -28,28 +31,58 @@ type nodeNetworkControllerManager struct {
Kube kube.Interface
watchFactory factory.NodeWatchFactory
stopChan chan struct{}
wg *sync.WaitGroup
recorder record.EventRecorder

defaultNodeNetworkController nad.BaseNetworkController

// net-attach-def controller handle net-attach-def and create/delete secondary controllers
// nil in dpu-host mode
nadController *nad.NetAttachDefinitionController
// vrf manager that creates and manages vrfs for all UDNs
vrfManager *vrfmanager.Controller
}

// NewNetworkController create secondary node network controllers for the given NetInfo
func (ncm *nodeNetworkControllerManager) NewNetworkController(nInfo util.NetInfo) (nad.NetworkController, error) {
topoType := nInfo.TopologyType()
switch topoType {
case ovntypes.Layer3Topology, ovntypes.Layer2Topology, ovntypes.LocalnetTopology:
return node.NewSecondaryNodeNetworkController(ncm.newCommonNetworkControllerInfo(), nInfo), nil
return node.NewSecondaryNodeNetworkController(ncm.newCommonNetworkControllerInfo(), nInfo, ncm.vrfManager)
}
return nil, fmt.Errorf("topology type %s not supported", topoType)
}

// CleanupDeletedNetworks cleans up all stale entities giving list of all existing secondary network controllers
func (ncm *nodeNetworkControllerManager) CleanupDeletedNetworks(validNetworks ...util.BasicNetInfo) error {
return nil
if !util.IsNetworkSegmentationSupportEnabled() {
return nil
}
validVRFDevices := make(sets.Set[string])
for _, network := range validNetworks {
if !network.IsPrimaryNetwork() {
continue
}
networkID, err := ncm.getNetworkID(network)
if err != nil {
klog.Errorf("Failed to get network identifier for network %s, error: %s", network.GetNetworkName(), err)
continue
}
validVRFDevices.Insert(util.GetVRFDeviceNameForUDN(networkID))
}
return ncm.vrfManager.Repair(validVRFDevices)
}

func (ncm *nodeNetworkControllerManager) getNetworkID(network util.BasicNetInfo) (int, error) {
nodes, err := ncm.watchFactory.GetNodes()
if err != nil {
return util.InvalidNetworkID, err
}
networkID, err := util.GetNetworkID(nodes, network)
if err != nil {
return util.InvalidNetworkID, err
}
return networkID, nil
}

// newCommonNetworkControllerInfo creates and returns the base node network controller info
Expand All @@ -67,13 +100,14 @@ func isNodeNADControllerRequired() bool {

// NewNodeNetworkControllerManager creates a new OVN controller manager to manage all the controller for all networks
func NewNodeNetworkControllerManager(ovnClient *util.OVNClientset, wf factory.NodeWatchFactory, name string,
eventRecorder record.EventRecorder) (*nodeNetworkControllerManager, error) {
wg *sync.WaitGroup, eventRecorder record.EventRecorder) (*nodeNetworkControllerManager, error) {
ncm := &nodeNetworkControllerManager{
name: name,
ovnNodeClient: &util.OVNNodeClientset{KubeClient: ovnClient.KubeClient, AdminPolicyRouteClient: ovnClient.AdminPolicyRouteClient},
Kube: &kube.Kube{KClient: ovnClient.KubeClient},
watchFactory: wf,
stopChan: make(chan struct{}),
wg: wg,
recorder: eventRecorder,
}

Expand All @@ -83,6 +117,9 @@ func NewNodeNetworkControllerManager(ovnClient *util.OVNClientset, wf factory.No
if isNodeNADControllerRequired() {
ncm.nadController, err = nad.NewNetAttachDefinitionController("node-network-controller-manager", ncm, wf)
}
if util.IsNetworkSegmentationSupportEnabled() {
ncm.vrfManager = vrfmanager.NewController()
}
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -145,9 +182,19 @@ func (ncm *nodeNetworkControllerManager) Start(ctx context.Context) (err error)
// nadController is nil if multi-network is disabled
if ncm.nadController != nil {
err = ncm.nadController.Start()
if err != nil {
return fmt.Errorf("failed to start NAD controller: %w", err)
}
}
if ncm.vrfManager != nil {
// Let's create VRF manager that will manage VRFs for all UDNs
err = ncm.vrfManager.Run(ncm.stopChan, ncm.wg)
if err != nil {
return fmt.Errorf("failed to run VRF Manager: %w", err)
}
}

return err
return nil
}

// Stop gracefully stops all managed controllers
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,19 @@ package networkControllerManager

import (
"fmt"
"sync"

"github.com/containernetworking/plugins/pkg/ns"
"github.com/containernetworking/plugins/pkg/testutils"
. "github.com/onsi/ginkgo"
. "github.com/onsi/gomega"
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config"
factoryMocks "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/factory/mocks"
ovntest "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing"
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types"
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util"

v1 "k8s.io/api/core/v1"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/kubernetes/fake"
Expand Down Expand Up @@ -96,15 +100,15 @@ var _ = Describe("Healthcheck tests", func() {
Describe("checkForStaleOVSRepresentorInterfaces", func() {
var ncm *nodeNetworkControllerManager
nodeName := "localNode"
podList := []*v1.Pod{
podList := []*corev1.Pod{
{
ObjectMeta: metav1.ObjectMeta{
Name: "a-pod",
Namespace: "a-ns",
Annotations: map[string]string{},
UID: "pod-a-uuid-1",
},
Spec: v1.PodSpec{
Spec: corev1.PodSpec{
NodeName: nodeName,
},
},
Expand All @@ -115,15 +119,15 @@ var _ = Describe("Healthcheck tests", func() {
Annotations: map[string]string{},
UID: "pod-b-uuid-2",
},
Spec: v1.PodSpec{
Spec: corev1.PodSpec{
NodeName: nodeName,
},
},
}

BeforeEach(func() {
// setup kube output
ncm, err = NewNodeNetworkControllerManager(fakeClient, &factoryMock, nodeName, nil)
ncm, err = NewNodeNetworkControllerManager(fakeClient, &factoryMock, nodeName, &sync.WaitGroup{}, nil)
Expect(err).NotTo(HaveOccurred())
factoryMock.On("GetPods", "").Return(podList, nil)
})
Expand Down Expand Up @@ -163,5 +167,92 @@ var _ = Describe("Healthcheck tests", func() {
Expect(execMock.CalledMatchesExpected()).To(BeTrue(), execMock.ErrorDesc)
})
})

})

Context("verify cleanup of deleted networks", func() {
var (
staleNetID uint = 100
nodeName string = "worker1"
nad = ovntest.GenerateNAD("bluenet", "rednad", "greenamespace",
types.Layer3Topology, "100.128.0.0/16", types.NetworkRolePrimary)
netName = "bluenet"
netID = 3
v4NodeSubnet = "10.128.0.0/24"
v6NodeSubnet = "ae70::66/112"
testNS ns.NetNS
fakeClient *util.OVNClientset
)

BeforeEach(func() {
// Restore global default values before each testcase
Expect(config.PrepareTestConfig()).To(Succeed())

testNS, err = testutils.NewNS()
Expect(err).NotTo(HaveOccurred())
v1Objects := []runtime.Object{}
fakeClient = &util.OVNClientset{
KubeClient: fake.NewSimpleClientset(v1Objects...),
}
})

AfterEach(func() {
Expect(testNS.Close()).To(Succeed())
Expect(testutils.UnmountNS(testNS)).To(Succeed())
})

It("check vrf devices are cleaned for deleted networks", func() {
config.OVNKubernetesFeature.EnableNetworkSegmentation = true
config.OVNKubernetesFeature.EnableMultiNetwork = true

factoryMock := factoryMocks.NodeWatchFactory{}
NetInfo, err := util.ParseNADInfo(nad)
Expect(err).NotTo(HaveOccurred())
node := &corev1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: nodeName,
Annotations: map[string]string{
"k8s.ovn.org/network-ids": fmt.Sprintf("{\"%s\": \"%d\"}", netName, netID),
"k8s.ovn.org/node-subnets": fmt.Sprintf("{\"%s\":[\"%s\", \"%s\"]}", netName, v4NodeSubnet, v6NodeSubnet)},
},
}
nodeList := []*corev1.Node{node}
factoryMock.On("GetNode", nodeName).Return(nodeList[0], nil)
factoryMock.On("GetNodes").Return(nodeList, nil)
factoryMock.On("NADInformer").Return(nil)

ncm, err := NewNodeNetworkControllerManager(fakeClient, &factoryMock, nodeName, &sync.WaitGroup{}, nil)
Expect(err).NotTo(HaveOccurred())

err = testNS.Do(func(ns.NetNS) error {
defer GinkgoRecover()

staleVrfDevice := util.GetVRFDeviceNameForUDN(int(staleNetID))
ovntest.AddVRFLink(staleVrfDevice, uint32(staleNetID))
_, err = util.GetNetLinkOps().LinkByName(staleVrfDevice)
Expect(err).NotTo(HaveOccurred())

validVrfDevice := util.GetVRFDeviceNameForUDN(int(netID))
ovntest.AddVRFLink(validVrfDevice, uint32(netID))
_, err = util.GetNetLinkOps().LinkByName(validVrfDevice)
Expect(err).NotTo(HaveOccurred())

err = ncm.CleanupDeletedNetworks(NetInfo)
Expect(err).NotTo(HaveOccurred())

// Verify CleanupDeletedNetworks cleans up VRF configuration for
// already deleted network.
_, err = util.GetNetLinkOps().LinkByName(staleVrfDevice)
Expect(err).To(HaveOccurred())

// Verify CleanupDeletedNetworks didn't cleanup VRF configuration for
// existing network.
_, err = util.GetNetLinkOps().LinkByName(validVrfDevice)
Expect(err).NotTo(HaveOccurred())

return nil
})
Expect(err).NotTo(HaveOccurred())
})
})
})
Loading

0 comments on commit 27ec2fa

Please sign in to comment.