Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove stale LRSRs from default and network scopped cluster router #4679

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 19 additions & 6 deletions go-controller/pkg/libovsdb/util/router.go
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
package util

import (
"errors"
"fmt"
"net"

libovsdbclient "github.com/ovn-org/libovsdb/client"
libovsdbops "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/libovsdb/ops"
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/nbdb"
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types"
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/util"
"k8s.io/klog/v2"
utilnet "k8s.io/utils/net"
)

Expand Down Expand Up @@ -45,12 +46,11 @@ func CreateDefaultRouteToExternal(nbClient libovsdbclient.Client, clusterRouter,
Nexthop: gatewayIP.IP.String(),
Policy: &nbdb.LogicalRouterStaticRoutePolicySrcIP,
}
if err = deleteStaleStaticRoutes(nbClient, clusterRouter, subnet.String(), gatewayIP.String()); err != nil {
klog.Errorf("Could not remove stale static route %v", err)
}
p := func(lrsr *nbdb.LogicalRouterStaticRoute) bool {
_, itemCIDR, err := net.ParseCIDR(lrsr.IPPrefix)
if err != nil {
return false
}
return util.ContainsCIDR(subnet, itemCIDR) &&
return lrsr.IPPrefix == subnet.String() &&
lrsr.Nexthop == gatewayIP.IP.String() &&
lrsr.Policy != nil && *lrsr.Policy == nbdb.LogicalRouterStaticRoutePolicySrcIP
}
Expand All @@ -60,3 +60,16 @@ func CreateDefaultRouteToExternal(nbClient libovsdbclient.Client, clusterRouter,
}
return nil
}

// deleteStaleStaticRoutes removes static route matching ClusterNetwork CIDR and referencing old join switch IP as NextHop.
func deleteStaleStaticRoutes(nbClient libovsdbclient.Client, clusterRouter, clusterSubnet, gwLRPIP string) error {
p := func(lrsr *nbdb.LogicalRouterStaticRoute) bool {
return lrsr.Nexthop != gwLRPIP && lrsr.IPPrefix == clusterSubnet && lrsr.Policy != nil &&
*lrsr.Policy == nbdb.LogicalRouterStaticRoutePolicySrcIP
}
err := libovsdbops.DeleteLogicalRouterStaticRoutesWithPredicate(nbClient, clusterRouter, p)
if err != nil && !errors.Is(err, libovsdbclient.ErrNotFound) {
return fmt.Errorf("failed to delete static route from router %s: %v", clusterRouter, err)
}
return nil
}
99 changes: 99 additions & 0 deletions go-controller/pkg/libovsdb/util/router_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
package util

import (
"fmt"
"net"
"testing"

"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/config"
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/nbdb"
libovsdbtest "github.com/ovn-org/ovn-kubernetes/go-controller/pkg/testing/libovsdb"
"github.com/ovn-org/ovn-kubernetes/go-controller/pkg/types"
)

func TestCreateDefaultRouteToExternal(t *testing.T) {
nodeName := "ovn-control-plane"
_, cidr4, _ := net.ParseCIDR("10.128.0.0/14")
config.Default.ClusterSubnets = []config.CIDRNetworkEntry{{cidr4, 24}}
expectedStaticRoute := &nbdb.LogicalRouterStaticRoute{
Nexthop: "100.64.0.1",
IPPrefix: "10.128.0.0/14",
Policy: &nbdb.LogicalRouterStaticRoutePolicySrcIP,
UUID: "route1-UUID",
}
unexpectedStaticRoute := &nbdb.LogicalRouterStaticRoute{
Nexthop: "100.66.0.1",
IPPrefix: "10.128.0.0/14",
Policy: &nbdb.LogicalRouterStaticRoutePolicySrcIP,
UUID: "route2-UUID",
}
initialOVNClusterRouter := &nbdb.LogicalRouter{
UUID: types.OVNClusterRouter + "-UUID",
Name: types.OVNClusterRouter,
StaticRoutes: []string{"route2-UUID"},
}
expectedOVNClusterRouter := &nbdb.LogicalRouter{
UUID: types.OVNClusterRouter + "-UUID",
Name: types.OVNClusterRouter,
StaticRoutes: []string{"route1-UUID"},
}
expectedLogicalRouterPort := &nbdb.LogicalRouterPort{
Name: types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + nodeName,
UUID: types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + nodeName + "-UUID",
Networks: []string{"100.64.0.1/16"},
}
GRName := "GR_" + nodeName
expectedOVNGatewayRouter := &nbdb.LogicalRouter{
UUID: GRName + "-UUID",
Name: GRName,
Ports: []string{types.GWRouterToJoinSwitchPrefix + types.GWRouterPrefix + nodeName + "-UUID"},
}

tests := []struct {
desc string
initialNbdb libovsdbtest.TestSetup
expectedNbdb libovsdbtest.TestSetup
}{
{
desc: "removes stale static route and creates new route with current gateway router logical port IP",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for adding the test!!
Would you mind adding one for the first commit too?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure

initialNbdb: libovsdbtest.TestSetup{
NBData: []libovsdbtest.TestData{
unexpectedStaticRoute,
initialOVNClusterRouter,
expectedLogicalRouterPort,
expectedOVNGatewayRouter,
},
},
expectedNbdb: libovsdbtest.TestSetup{
NBData: []libovsdbtest.TestData{
expectedStaticRoute,
expectedOVNClusterRouter,
expectedLogicalRouterPort,
expectedOVNGatewayRouter,
},
},
},
}
for i, tc := range tests {
t.Run(fmt.Sprintf("%d:%s", i, tc.desc), func(t *testing.T) {
nbClient, cleanup, err := libovsdbtest.NewNBTestHarness(tc.initialNbdb, nil)
if err != nil {
t.Fatal(fmt.Errorf("test: \"%s\" failed to create test harness: %v", tc.desc, err))
}
t.Cleanup(cleanup.Cleanup)

var e error
if e = CreateDefaultRouteToExternal(nbClient, types.OVNClusterRouter, GRName); e != nil {
t.Fatal(fmt.Errorf("failed to create pod to external catch-all reroute for gateway router %s, err: %v", GRName, e))
}
matcher := libovsdbtest.HaveDataIgnoringUUIDs(tc.expectedNbdb.NBData)
success, err := matcher.Match(nbClient)
if !success {
t.Fatal(fmt.Errorf("test: \"%s\" didn't match expected with actual, err: %v", tc.desc, matcher.FailureMessage(nbClient)))
}
if err != nil {
t.Fatal(fmt.Errorf("test: \"%s\" encountered error: %v", tc.desc, err))
}
})
}
}
44 changes: 42 additions & 2 deletions go-controller/pkg/ovn/gateway.go
Original file line number Diff line number Diff line change
Expand Up @@ -515,7 +515,6 @@ func (gw *GatewayManager) GatewayInit(
// to the same gateway router
//
// This can be removed once https://bugzilla.redhat.com/show_bug.cgi?id=1891516 is fixed.
// FIXME(trozet): if LRP IP is changed, we do not remove stale instances of these routes
for _, gwLRPIP := range gwLRPIPs {
lrsr := nbdb.LogicalRouterStaticRoute{
IPPrefix: gwLRPIP.String(),
Expand All @@ -526,13 +525,23 @@ func (gw *GatewayManager) GatewayInit(
types.NetworkExternalID: gw.netInfo.GetNetworkName(),
types.TopologyExternalID: gw.netInfo.TopologyType(),
}
if !config.OVNKubernetesFeature.EnableInterconnect {
lrsr.ExternalIDs["nodeName"] = nodeName
}
} else if !config.OVNKubernetesFeature.EnableInterconnect {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Was it intentional that you override all ExternalIDs in case of non-secondary networks?
Otherwise, You could just extract the if and do it once:

if gw.netInfo.IsSecondary() {
...
}
...
if !config.OVNKubernetesFeature.EnableInterconnect {
}

lrsr.ExternalIDs = map[string]string{
"nodeName": nodeName,
}
}
p := func(item *nbdb.LogicalRouterStaticRoute) bool {
return item.IPPrefix == lrsr.IPPrefix &&
libovsdbops.PolicyEqualPredicate(lrsr.Policy, item.Policy)
}

if gw.clusterRouterName != "" {
if err = gw.deleteStaleJoinSubnetRoutes(nodeName, gwLRPIP.String()); err != nil {
klog.Errorf("Could not remove stale static route %v", err)
}
err := libovsdbops.CreateOrReplaceLogicalRouterStaticRouteWithPredicate(gw.nbClient,
gw.clusterRouterName, &lrsr, p, &lrsr.Nexthop)
if err != nil {
Expand Down Expand Up @@ -575,7 +584,6 @@ func (gw *GatewayManager) GatewayInit(
// towards join switch.
mgmtIfAddr := util.GetNodeManagementIfAddr(hostSubnet)
gw.staticRouteCleanup([]net.IP{mgmtIfAddr.IP})

if err := libovsdbops.CreateOrReplaceLogicalRouterStaticRouteWithPredicate(
gw.nbClient,
gw.clusterRouterName,
Expand Down Expand Up @@ -1093,6 +1101,38 @@ func (gw *GatewayManager) staticRouteCleanup(nextHops []net.IP) {
}
}

// deleteStaleJoinSubnetRoutes removes static routes referencing old join switch IP as either NextHop or IPPrefix.
// It acts on following LRSRs:
// 100.64.0.4 100.64.0.4 dst-ip
func (gw *GatewayManager) deleteStaleJoinSubnetRoutes(nodeName, gwLRPIP string) error {
p := func(lrsr *nbdb.LogicalRouterStaticRoute) bool {
if lrsr.Nexthop != gwLRPIP && utilnet.IPFamilyOfString(lrsr.Nexthop) == utilnet.IPFamilyOfString(gwLRPIP) &&
lrsr.Nexthop == lrsr.IPPrefix {
networkName, isSecondaryNetwork := lrsr.ExternalIDs[types.NetworkExternalID]
if isSecondaryNetwork && networkName == gw.netInfo.GetNetworkName() {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This nested if/elseif block could be simplified.
I wonder if there is any benefit in not including the nodeName when EnableInterconnect is true, if we had it would simplify this check.

if !config.OVNKubernetesFeature.EnableInterconnect && lrsr.ExternalIDs["nodeName"] == nodeName {
return true
} else if config.OVNKubernetesFeature.EnableInterconnect {
return true
}
} else if !isSecondaryNetwork {
if !config.OVNKubernetesFeature.EnableInterconnect && lrsr.ExternalIDs["nodeName"] == nodeName {
return true
} else if config.OVNKubernetesFeature.EnableInterconnect {
return true
}
}
}
return false
}

err := libovsdbops.DeleteLogicalRouterStaticRoutesWithPredicate(gw.nbClient, gw.clusterRouterName, p)
if err != nil && !errors.Is(err, libovsdbclient.ErrNotFound) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think that DeleteLogicalRouterStaticRoutesWithPredicate returns libovsdbclient.ErrNotFound

return fmt.Errorf("failed to delete static route from router %s: %v", gw.clusterRouterName, err)
}
return nil
}

// policyRouteCleanup cleans up all policies on cluster router that have a nextHop
// in the provided list.
// - if the LRP exists and has the len(nexthops) > 1: it removes
Expand Down
107 changes: 96 additions & 11 deletions go-controller/pkg/ovn/gateway_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,9 @@ func generateGatewayInitExpectedNB(testData []libovsdbtest.TestData, expectedOVN
UUID: joinStaticRouteNamedUUID,
IPPrefix: joinLRPIP.IP.String(),
Nexthop: joinLRPIP.IP.String(),
ExternalIDs: map[string]string{
"nodeName": nodeName,
},
})
}
var options map[string]string
Expand Down Expand Up @@ -672,6 +675,99 @@ var _ = ginkgo.Describe("Gateway Init Operations", func() {
gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState))
})

ginkgo.It("removes stale static route for old join subnet", func() {
routeUUID1 := "route1-UUID"
leftoverMasqueradeIPRoute1 := &nbdb.LogicalRouterStaticRoute{
Nexthop: "100.66.0.2",
IPPrefix: "100.66.0.2",
UUID: routeUUID1,
}
routeUUID2 := "route2-UUID"
leftoverMasqueradeIPRoute2 := &nbdb.LogicalRouterStaticRoute{
Nexthop: "100.66.0.2",
IPPrefix: "10.130.0.0/23",
Policy: &nbdb.LogicalRouterStaticRoutePolicySrcIP,
UUID: routeUUID2,
}
expectedOVNClusterRouter := &nbdb.LogicalRouter{
UUID: types.OVNClusterRouter + "-UUID",
Name: types.OVNClusterRouter,
}
expectedNodeSwitch := &nbdb.LogicalSwitch{
UUID: nodeName + "-UUID",
Name: nodeName,
}
expectedClusterLBGroup := &nbdb.LoadBalancerGroup{
UUID: types.ClusterLBGroupName + "-UUID",
Name: types.ClusterLBGroupName,
}
expectedSwitchLBGroup := &nbdb.LoadBalancerGroup{
UUID: types.ClusterSwitchLBGroupName + "-UUID",
Name: types.ClusterSwitchLBGroupName,
}
expectedRouterLBGroup := &nbdb.LoadBalancerGroup{
UUID: types.ClusterRouterLBGroupName + "-UUID",
Name: types.ClusterRouterLBGroupName,
}
fakeOvn.startWithDBSetup(libovsdbtest.TestSetup{
NBData: []libovsdbtest.TestData{
// tests migration from local to shared
leftoverMasqueradeIPRoute1,
leftoverMasqueradeIPRoute2,
&nbdb.LogicalSwitch{
UUID: types.OVNJoinSwitch + "-UUID",
Name: types.OVNJoinSwitch,
},
expectedOVNClusterRouter,
expectedNodeSwitch,
expectedClusterLBGroup,
expectedSwitchLBGroup,
expectedRouterLBGroup,
},
})

clusterIPSubnets := ovntest.MustParseIPNets("10.128.0.0/14")
hostSubnets := ovntest.MustParseIPNets("10.130.0.0/23")
joinLRPIPs := ovntest.MustParseIPNets("100.64.0.3/16")
defLRPIPs := ovntest.MustParseIPNets("100.64.0.1/16")
l3GatewayConfig := &util.L3GatewayConfig{
Mode: config.GatewayModeLocal,
ChassisID: "SYSTEM-ID",
InterfaceID: "INTERFACE-ID",
MACAddress: ovntest.MustParseMAC("11:22:33:44:55:66"),
IPAddresses: ovntest.MustParseIPNets("169.255.33.2/24"),
NextHops: ovntest.MustParseIPs("169.255.33.1"),
NodePortEnable: true,
}
sctpSupport := false

var err error
fakeOvn.controller.defaultCOPPUUID, err = EnsureDefaultCOPP(fakeOvn.nbClient)
gomega.Expect(err).NotTo(gomega.HaveOccurred())

err = newGatewayManager(fakeOvn, nodeName).GatewayInit(
nodeName,
clusterIPSubnets,
hostSubnets,
l3GatewayConfig,
sctpSupport,
joinLRPIPs,
defLRPIPs,
extractExternalIPs(l3GatewayConfig),
true,
)
gomega.Expect(err).NotTo(gomega.HaveOccurred())
testData := []libovsdbtest.TestData{}
skipSnat := false
expectedOVNClusterRouter.StaticRoutes = []string{} // the leftover LGW route should have got deleted
// We don't set up the Allow from mgmt port ACL here
mgmtPortIP := ""
expectedDatabaseState := generateGatewayInitExpectedNB(testData, expectedOVNClusterRouter, expectedNodeSwitch,
nodeName, clusterIPSubnets, hostSubnets, l3GatewayConfig, joinLRPIPs, defLRPIPs, skipSnat, mgmtPortIP,
"1400")
gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState))
})

ginkgo.It("creates an IPv4 gateway in OVN without next hops", func() {
routeUUID := "route-UUID"
leftoverMgmtIPRoute := &nbdb.LogicalRouterStaticRoute{
Expand Down Expand Up @@ -940,7 +1036,6 @@ var _ = ginkgo.Describe("Gateway Init Operations", func() {
gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState))

ginkgo.By("modifying the node join IP")
oldJoinLRPIPs := joinLRPIPs
joinLRPIPs = ovntest.MustParseIPNets("100.64.0.99/16")
expectedOVNClusterRouter.StaticRoutes = []string{}
err = newGatewayManager(fakeOvn, nodeName).GatewayInit(
Expand All @@ -958,16 +1053,6 @@ var _ = ginkgo.Describe("Gateway Init Operations", func() {
expectedDatabaseState = generateGatewayInitExpectedNB(testData, expectedOVNClusterRouter, expectedNodeSwitch,
nodeName, clusterIPSubnets, hostSubnets, l3GatewayConfig, joinLRPIPs, defLRPIPs, skipSnat, mgmtPortIP,
"1400")
// FIXME(trozet): we do not clean up stale routes after join IP changes
for i, joinLRPIP := range oldJoinLRPIPs {
joinStaticRouteNamedUUID := fmt.Sprintf("hack-join-static-route-ovn-cluster-router-%v-UUID", i)
expectedOVNClusterRouter.StaticRoutes = append(expectedOVNClusterRouter.StaticRoutes, joinStaticRouteNamedUUID)
expectedDatabaseState = append(expectedDatabaseState, &nbdb.LogicalRouterStaticRoute{
UUID: joinStaticRouteNamedUUID,
IPPrefix: joinLRPIP.IP.String(),
Nexthop: joinLRPIP.IP.String(),
})
}
gomega.Eventually(fakeOvn.nbClient).Should(libovsdbtest.HaveData(expectedDatabaseState))
})

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -699,7 +699,7 @@ func expectedLogicalRouterPolicy(routerPolicyUUID1 string, netInfo util.NetInfo,
}

func expectedGRStaticRoute(uuid, ipPrefix, nextHop string, policy *nbdb.LogicalRouterStaticRoutePolicy, outputPort *string, netInfo util.NetInfo) *nbdb.LogicalRouterStaticRoute {
return &nbdb.LogicalRouterStaticRoute{
lrsr := &nbdb.LogicalRouterStaticRoute{
UUID: uuid,
IPPrefix: ipPrefix,
OutputPort: outputPort,
Expand All @@ -710,6 +710,10 @@ func expectedGRStaticRoute(uuid, ipPrefix, nextHop string, policy *nbdb.LogicalR
ovntypes.TopologyExternalID: netInfo.TopologyType(),
},
}
if !config.OVNKubernetesFeature.EnableInterconnect && ipPrefix == nextHop {
lrsr.ExternalIDs["nodeName"] = nodeName
}
return lrsr
}

func allowAllFromMgmtPort(aclUUID string, mgmtPortIP string, switchName string) *nbdb.ACL {
Expand Down
Loading
Loading