Skip to content

Commit

Permalink
E2E: add graceful restart and BFD test
Browse files Browse the repository at this point in the history
- check the bgp peering is in place at all times
- use image image with a fix on top of stable 9.1

Signed-off-by: karampok <[email protected]>
  • Loading branch information
karampok committed Nov 7, 2024
1 parent bfeacb8 commit 303ff02
Show file tree
Hide file tree
Showing 8 changed files with 187 additions and 19 deletions.
8 changes: 4 additions & 4 deletions config/all-in-one/frr-k8s-prometheus.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1177,7 +1177,7 @@ spec:
env:
- name: TINI_SUBREAPER
value: "true"
image: quay.io/frrouting/frr:9.1.0
image: quay.io/karampok/frr:9.1-fixbfd
livenessProbe:
failureThreshold: 3
httpGet:
Expand Down Expand Up @@ -1210,7 +1210,7 @@ spec:
- --metrics-bind-address=127.0.0.1
command:
- /etc/frr_metrics/frr-metrics
image: quay.io/frrouting/frr:9.1.0
image: quay.io/karampok/frr:9.1-fixbfd
name: frr-metrics
ports:
- containerPort: 7573
Expand All @@ -1224,7 +1224,7 @@ spec:
name: metrics
- command:
- /etc/frr_reloader/frr-reloader.sh
image: quay.io/frrouting/frr:9.1.0
image: quay.io/karampok/frr:9.1-fixbfd
name: reloader
volumeMounts:
- mountPath: /var/run/frr
Expand All @@ -1239,7 +1239,7 @@ spec:
- /bin/sh
- -c
- cp -rLf /tmp/frr/* /etc/frr/
image: quay.io/frrouting/frr:9.1.0
image: quay.io/karampok/frr:9.1-fixbfd
name: cp-frr-files
securityContext:
runAsGroup: 101
Expand Down
8 changes: 4 additions & 4 deletions config/all-in-one/frr-k8s.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1146,7 +1146,7 @@ spec:
env:
- name: TINI_SUBREAPER
value: "true"
image: quay.io/frrouting/frr:9.1.0
image: quay.io/karampok/frr:9.1-fixbfd
livenessProbe:
failureThreshold: 3
httpGet:
Expand Down Expand Up @@ -1179,7 +1179,7 @@ spec:
- --metrics-bind-address=127.0.0.1
command:
- /etc/frr_metrics/frr-metrics
image: quay.io/frrouting/frr:9.1.0
image: quay.io/karampok/frr:9.1-fixbfd
name: frr-metrics
ports:
- containerPort: 7573
Expand All @@ -1193,7 +1193,7 @@ spec:
name: metrics
- command:
- /etc/frr_reloader/frr-reloader.sh
image: quay.io/frrouting/frr:9.1.0
image: quay.io/karampok/frr:9.1-fixbfd
name: reloader
volumeMounts:
- mountPath: /var/run/frr
Expand All @@ -1208,7 +1208,7 @@ spec:
- /bin/sh
- -c
- cp -rLf /tmp/frr/* /etc/frr/
image: quay.io/frrouting/frr:9.1.0
image: quay.io/karampok/frr:9.1-fixbfd
name: cp-frr-files
securityContext:
runAsGroup: 101
Expand Down
8 changes: 4 additions & 4 deletions config/frr-k8s/frr-k8s.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ spec:
securityContext:
capabilities:
add: ["NET_ADMIN", "NET_RAW", "SYS_ADMIN", "NET_BIND_SERVICE"]
image: quay.io/frrouting/frr:9.1.0
image: quay.io/karampok/frr:9.1-fixbfd
env:
- name: TINI_SUBREAPER
value: "true"
Expand Down Expand Up @@ -136,7 +136,7 @@ spec:
failureThreshold: 30
periodSeconds: 5
- name: frr-metrics
image: quay.io/frrouting/frr:9.1.0
image: quay.io/karampok/frr:9.1-fixbfd
command: ["/etc/frr_metrics/frr-metrics"]
args:
- --metrics-port=7573
Expand All @@ -152,7 +152,7 @@ spec:
- name: metrics
mountPath: /etc/frr_metrics
- name: reloader
image: quay.io/frrouting/frr:9.1.0
image: quay.io/karampok/frr:9.1-fixbfd
command: ["/etc/frr_reloader/frr-reloader.sh"]
volumeMounts:
- name: frr-sockets
Expand Down Expand Up @@ -186,7 +186,7 @@ spec:
securityContext:
runAsUser: 100
runAsGroup: 101
image: quay.io/frrouting/frr:9.1.0
image: quay.io/karampok/frr:9.1-fixbfd
command: ["/bin/sh", "-c", "cp -rLf /tmp/frr/* /etc/frr/"]
volumeMounts:
- name: frr-startup
Expand Down
2 changes: 1 addition & 1 deletion e2etests/e2etest_suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ func handleFlags() {
flag.StringVar(&prometheusNamespace, "prometheus-namespace", "monitoring", "the namespace prometheus is running in (if running)")
flag.StringVar(&externalContainers, "external-containers", "", "a comma separated list of external containers names to use for the test. (valid parameters are: ibgp-single-hop / ibgp-multi-hop / ebgp-single-hop / ebgp-multi-hop)")
flag.StringVar(&executor.Kubectl, "kubectl", "kubectl", "the path for the kubectl binary")
flag.StringVar(&frrImage, "frr-image", "quay.io/frrouting/frr:9.1.0", "the image to use for the external frr containers")
flag.StringVar(&frrImage, "frr-image", "quay.io/karampok/frr:9.1-fixbfd", "the image to use for the external frr containers")
flag.StringVar(&k8s.FRRK8sNamespace, "frr-k8s-namespace", "frr-k8s-system", "the namespace frr-k8s is running in")

flag.Parse()
Expand Down
15 changes: 11 additions & 4 deletions e2etests/pkg/config/from_containers.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,11 +88,18 @@ func PeersForContainers(frrs []*frrcontainer.FRR, ipFam ipfamily.Family, options
return res
}

func EnableGracefulRestart(pc *PeersConfig) {
// for _,p := range pc.PeersV4 { // not working, go uses a copy of the value instead of the value itself within a range clause
// p.Neigh.EnableGracefulRestart = true
// }
func EnableSimpleBFD(pc *PeersConfig) {
t := pc.PeersV4
for i := 0; i < len(t); i++ {
t[i].Neigh.BFDProfile = "simple"
}
t = pc.PeersV6
for i := 0; i < len(t); i++ {
t[i].Neigh.BFDProfile = "simple"
}
}

func EnableGracefulRestart(pc *PeersConfig) {
t := pc.PeersV4
for i := 0; i < len(t); i++ {
t[i].Neigh.EnableGracefulRestart = true
Expand Down
10 changes: 9 additions & 1 deletion e2etests/tests/graceful_restart.go
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,9 @@ var _ = ginkgo.Describe("Establish BGP session with EnableGracefulRestart", func

err := updater.Update(peersConfig.Secrets, frrConfigCR)
Expect(err).NotTo(HaveOccurred(), "apply the CR in k8s api failed")
for _, p := range peersConfig.Peers() {
ValidateFRRPeeredWithNodes(nodes, &p.FRR, ipFam)
}

check := func() error {
for _, p := range peersConfig.Peers() {
Expand All @@ -121,12 +124,17 @@ var _ = ginkgo.Describe("Establish BGP session with EnableGracefulRestart", func
defer ginkgo.GinkgoRecover()
err := k8s.RestartFRRK8sPods(cs)
Expect(err).NotTo(HaveOccurred(), "frr-k8s pods failed to restart")
ginkgo.By("frr-k8s pods are ready")
for _, p := range peersConfig.Peers() {
ValidateFRRPeeredWithNodes(nodes, &p.FRR, ipFam)
}
Consistently(check, 10*time.Second, time.Second).ShouldNot(HaveOccurred())
close(c)
}()

// 2*time.Minute is important because that is the Graceful Restart timer.
Consistently(check, 2*time.Minute, time.Second).ShouldNot(HaveOccurred())
Eventually(c, time.Minute, time.Second).Should(BeClosed(), "restart FRRK8s pods are not yet ready")
Eventually(c, time.Minute, time.Second).Should(BeClosed(), "restart FRRK8s not ready or peering not established")
},
ginkgo.Entry("IPV4", ipfamily.IPv4, "192.168.2.0/24"),
ginkgo.Entry("IPV6", ipfamily.IPv6, "fc00:f853:ccd:e799::/64"),
Expand Down
153 changes: 153 additions & 0 deletions e2etests/tests/graceful_restart_bfd.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
// SPDX-License-Identifier:Apache-2.0

package tests

import (
"fmt"
"time"

"github.com/onsi/ginkgo/v2"
"github.com/openshift-kni/k8sreporter"
"go.universe.tf/e2etest/pkg/frr/container"
frrcontainer "go.universe.tf/e2etest/pkg/frr/container"

frrk8sv1beta1 "github.com/metallb/frr-k8s/api/v1beta1"
"github.com/metallb/frrk8stests/pkg/config"
"github.com/metallb/frrk8stests/pkg/dump"
"github.com/metallb/frrk8stests/pkg/infra"
"github.com/metallb/frrk8stests/pkg/k8s"
"github.com/metallb/frrk8stests/pkg/k8sclient"
"github.com/metallb/frrk8stests/pkg/routes"
. "github.com/onsi/gomega"
frrconfig "go.universe.tf/e2etest/pkg/frr/config"
"go.universe.tf/e2etest/pkg/ipfamily"
corev1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/utils/ptr"

clientset "k8s.io/client-go/kubernetes"
)

var _ = ginkgo.Describe("Establish BGP session with EnableGracefulRestart and BFD", func() {
var (
cs clientset.Interface
updater *config.Updater
reporter *k8sreporter.KubernetesReporter
nodes []corev1.Node
)

cleanup := func(u *config.Updater) error {
for _, c := range infra.FRRContainers {
if err := c.UpdateBGPConfigFile(frrconfig.Empty); err != nil {
return fmt.Errorf("clear config in the infra container failed: %w", err)
}
}
if err := u.Clean(); err != nil {
return fmt.Errorf("clear config in the API failed: %w", err)
}
return nil
}

ginkgo.BeforeEach(func() {
var err error

reporter = dump.NewK8sReporter(k8s.FRRK8sNamespace)
updater, err = config.NewUpdater()
Expect(err).NotTo(HaveOccurred())

err = cleanup(updater)
Expect(err).NotTo(HaveOccurred(), "cleanup config in API and infra containers")

cs = k8sclient.New()
nodes, err = k8s.Nodes(cs)
Expect(err).NotTo(HaveOccurred())

})

ginkgo.AfterEach(func() {
if ginkgo.CurrentSpecReport().Failed() {
testName := ginkgo.CurrentSpecReport().LeafNodeText
dump.K8sInfo(testName, reporter)
dump.BGPInfo(testName, infra.FRRContainers, cs)
}
})

ginkgo.Context("When restarting the frrk8s deamon pods", func() {

ginkgo.DescribeTable("external BGP peer maintains routes", func(ipFam ipfamily.Family, prefix string) {
frrs := config.ContainersForVRF(infra.FRRContainers, "")
for _, c := range frrs {
err := container.PairWithNodes(cs, c, ipFam, func(container *frrcontainer.FRR) {
container.NeighborConfig.BFDEnabled = true
})
Expect(err).NotTo(HaveOccurred(), "set frr config in infra containers failed")
}

peersConfig := config.PeersForContainers(frrs, ipFam,
config.EnableAllowAll, config.EnableGracefulRestart, config.EnableSimpleBFD)

frrConfigCR := frrk8sv1beta1.FRRConfiguration{
ObjectMeta: metav1.ObjectMeta{
Name: "graceful-restart-test",
Namespace: k8s.FRRK8sNamespace,
},
Spec: frrk8sv1beta1.FRRConfigurationSpec{
BGP: frrk8sv1beta1.BGPConfig{
BFDProfiles: []frrk8sv1beta1.BFDProfile{
{
Name: "simple",
ReceiveInterval: ptr.To[uint32](1000),
DetectMultiplier: ptr.To[uint32](3),
},
},
Routers: []frrk8sv1beta1.Router{
{
ASN: infra.FRRK8sASN,
Neighbors: config.NeighborsFromPeers(peersConfig.PeersV4, peersConfig.PeersV6),
Prefixes: []string{prefix},
},
},
},
},
}

err := updater.Update(peersConfig.Secrets, frrConfigCR)
Expect(err).NotTo(HaveOccurred(), "apply the CR in k8s api failed")
for _, p := range peersConfig.Peers() {
ValidateFRRPeeredWithNodes(nodes, &p.FRR, ipFam)
}

check := func() error {
for _, p := range peersConfig.Peers() {
err := routes.CheckNeighborHasPrefix(p.FRR, p.FRR.RouterConfig.VRF, prefix, nodes)
if err != nil {
return fmt.Errorf("Neigh %s does not have prefix %s: %w", p.FRR.Name, prefix, err)
}
}
return nil
}

Eventually(check, time.Minute, time.Second).ShouldNot(HaveOccurred(),
"route should exist before we restart frr-k8s")

c := make(chan struct{})
go func() { // go restart frr-k8s while Consistently check that route exists
defer ginkgo.GinkgoRecover()
err := k8s.RestartFRRK8sPods(cs)
Expect(err).NotTo(HaveOccurred(), "frr-k8s pods failed to restart")
ginkgo.By("frr-k8s pods are ready")
for _, p := range peersConfig.Peers() {
ValidateFRRPeeredWithNodes(nodes, &p.FRR, ipFam)
}
close(c)
}()

// 2*time.Minute is important because that is the Graceful Restart timer.
Consistently(check, 2*time.Minute, time.Second).ShouldNot(HaveOccurred())
Eventually(c, 2*time.Minute, time.Second).Should(BeClosed(), "restart FRRK8s not ready or peering not established")
},
ginkgo.FEntry("IPV4", ipfamily.IPv4, "192.168.2.0/24"),
ginkgo.Entry("IPV6", ipfamily.IPv6, "fc00:f853:ccd:e799::/64"),
)
})
})
2 changes: 1 addition & 1 deletion e2etests/tests/validate.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ func ValidateFRRPeeredWithNodes(nodes []corev1.Node, c *frrcontainer.FRR, ipFami
return fmt.Errorf("failed to match neighbors for %s, %w", c.Name, err)
}
return nil
}, 4*time.Minute, 1*time.Second).ShouldNot(HaveOccurred())
}, 2*time.Minute, 1*time.Second).ShouldNot(HaveOccurred())
}

func ValidatePrefixesForNeighbor(neigh frrcontainer.FRR, nodes []v1.Node, prefixes ...string) {
Expand Down

0 comments on commit 303ff02

Please sign in to comment.