Skip to content

Commit

Permalink
Purge ports without interfaces (#109)
Browse files Browse the repository at this point in the history
As described in GH-106 a node failure leads to the OVS CNI driver's DEL
operation being called without the CNI_NETNS argument. This made the
CNI driver panic.

An empty CNI_NETNS argument is allowed according to version 0.4.0 of the
CNI spec
(https://github.com/containernetworking/cni/blob/spec-v0.4.0/SPEC.md).

The spec suggests to "clean up as many resources as possible
(e.g. releasing IPAM allocations) and return a successful response". In
accordance with this suggestion the OVS CNI driver now looks for
interfaces with an error value set. It then iterates over the ports
belonging to those interfaces and removes them.

Signed-off-by: Ferdinand Hofherr <[email protected]>
  • Loading branch information
fhofherr authored Mar 11, 2020
1 parent 0ae0c0a commit 23d5721
Show file tree
Hide file tree
Showing 3 changed files with 210 additions and 19 deletions.
91 changes: 89 additions & 2 deletions pkg/ovsdb/ovsdb.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,13 @@ package ovsdb
import (
"errors"
"fmt"
"log"

"github.com/socketplane/libovsdb"
)

const ovsPortOwner = "ovs-cni.network.kubevirt.io"

// OVS driver state
type OvsDriver struct {
// OVS client
Expand Down Expand Up @@ -125,6 +128,14 @@ func (self *OvsBridgeDriver) DeletePort(intfName string) error {
return err
}

externalIDs, err := getExternalIDs(row)
if err != nil {
return fmt.Errorf("get external ids: %v", err)
}
if externalIDs["owner"] != ovsPortOwner {
return fmt.Errorf("port not created by ovs-cni")
}

// We make a select transaction using the interface name
// Then get the Port UUID from it
portUuidStr := row["_uuid"].([]interface{})
Expand All @@ -143,6 +154,33 @@ func (self *OvsBridgeDriver) DeletePort(intfName string) error {
return err
}

func getExternalIDs(row map[string]interface{}) (map[string]string, error) {
rowVal, ok := row["external_ids"]
if !ok {
return nil, fmt.Errorf("row does not contain external_ids")
}

rowValSlice, ok := rowVal.([]interface{})
if !ok || len(rowValSlice) != 2 || rowValSlice[0] != "map" {
return nil, fmt.Errorf("not a OvsMap: %T: %v", rowVal, rowVal)
}
mapVals, ok := rowValSlice[1].([]interface{})
if !ok {
return nil, fmt.Errorf("cannot get map values: %v", rowValSlice[1])
}
extIDs := make(map[string]string, len(rowValSlice))
for _, mapEntry := range mapVals {
me, ok := mapEntry.([]interface{})
if !ok || len(me) != 2 {
return nil, fmt.Errorf("invalid map entry: %v", mapEntry)
}
ks := fmt.Sprintf("%v", me[0])
vs := fmt.Sprintf("%v", me[1])
extIDs[ks] = vs
}
return extIDs, nil
}

func (self *OvsDriver) BridgeList() ([]string, error) {
selectOp := []libovsdb.Operation{{
Op: "select",
Expand Down Expand Up @@ -205,7 +243,11 @@ func (self *OvsDriver) IsBridgePresent(bridgeName string) (bool, error) {

// Return ovs port name for an container interface
func (self *OvsDriver) GetOvsPortForContIface(contIface, contNetnsPath string) (string, bool, error) {
searchMap := map[string]string{"contNetns": contNetnsPath, "contIface": contIface}
searchMap := map[string]string{
"contNetns": contNetnsPath,
"contIface": contIface,
"owner": ovsPortOwner,
}
ovsmap, err := libovsdb.NewOvsMap(searchMap)
if err != nil {
return "", false, err
Expand All @@ -221,6 +263,47 @@ func (self *OvsDriver) GetOvsPortForContIface(contIface, contNetnsPath string) (
return fmt.Sprintf("%v", port["name"]), true, nil
}

func (self *OvsDriver) FindInterfacesWithError() ([]string, error) {
selectOp := libovsdb.Operation{
Op: "select",
Columns: []string{"name", "error"},
Table: "Interface",
}
transactionResult, err := self.ovsdbTransact([]libovsdb.Operation{selectOp})
if err != nil {
return nil, err
}
if len(transactionResult) != 1 {
return nil, fmt.Errorf("no transaction result")
}
operationResult := transactionResult[0]
if operationResult.Error != "" {
return nil, fmt.Errorf(operationResult.Error)
}

var names []string
for _, row := range operationResult.Rows {
if !hasError(row) {
continue
}
names = append(names, fmt.Sprintf("%v", row["name"]))
}
if len(names) > 0 {
log.Printf("found %d interfaces with error", len(names))
}
return names, nil
}

func hasError(row map[string]interface{}) bool {
v := row["error"]
switch x := v.(type) {
case string:
return x != ""
default:
return false
}
}

// ************************ Notification handler for OVS DB changes ****************
func (self *OvsDriver) Update(context interface{}, tableUpdates libovsdb.TableUpdates) {
}
Expand Down Expand Up @@ -318,7 +401,11 @@ func createPortOperation(intfName, contNetnsPath, contIfaceName string, vlanTag
return nil, nil, err
}

oMap, err := libovsdb.NewOvsMap(map[string]string{"contNetns": contNetnsPath, "contIface": contIfaceName})
oMap, err := libovsdb.NewOvsMap(map[string]string{
"contNetns": contNetnsPath,
"contIface": contIfaceName,
"owner": ovsPortOwner,
})
if err != nil {
return nil, nil, err
}
Expand Down
55 changes: 39 additions & 16 deletions pkg/plugin/plugin.go
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,23 @@ func getOvsPortForContIface(ovsDriver *ovsdb.OvsBridgeDriver, contIface string,
return ovsDriver.GetOvsPortForContIface(contIface, contNetnsPath)
}

// cleanPorts removes all ports whose interfaces have an error.
func cleanPorts(ovsDriver *ovsdb.OvsBridgeDriver) error {
ifaces, err := ovsDriver.FindInterfacesWithError()
if err != nil {
return fmt.Errorf("clean ports: %v", err)
}
for _, iface := range ifaces {
log.Printf("Info: interface %s has error: removing corresponding port", iface)
if err := ovsDriver.DeletePort(iface); err != nil {
// Don't return an error here, just log its occurrence.
// Something else may have removed the port already.
log.Printf("Error: %v\n", err)
}
}
return nil
}

func removeOvsPort(ovsDriver *ovsdb.OvsBridgeDriver, portName string) error {

return ovsDriver.DeletePort(portName)
Expand All @@ -385,10 +402,6 @@ func removeOvsPort(ovsDriver *ovsdb.OvsBridgeDriver, portName string) error {
func CmdDel(args *skel.CmdArgs) error {
logCall("DEL", args)

if args.Netns == "" {
panic("This should never happen, if it does, it means caller does not pass container network namespace as a parameter and therefore OVS port cleanup will not work")
}

envArgs, err := getEnvArgs(args.Args)
if err != nil {
return err
Expand Down Expand Up @@ -421,20 +434,30 @@ func CmdDel(args *skel.CmdArgs) error {
}
}

// Unlike veth pair, OVS port will not be automatically removed when
// container namespace is gone. Find port matching DEL arguments and remove
// it explicitly.
portName, portFound, err := getOvsPortForContIface(ovsDriver, args.IfName, args.Netns)
if err != nil {
return fmt.Errorf("Failed to obtain OVS port for given connection: %v", err)
}

// Do not return an error if the port was not found, it may have been
// already removed by someone.
if portFound {
if err := removeOvsPort(ovsDriver, portName); err != nil {
if args.Netns == "" {
// The CNI_NETNS parameter may be empty according to version 0.4.0
// of the CNI spec (https://github.com/containernetworking/cni/blob/spec-v0.4.0/SPEC.md).
// In accordance with the spec we clean up as many resources as possible.
if err := cleanPorts(ovsDriver); err != nil {
return err
}
return nil
} else {
// Unlike veth pair, OVS port will not be automatically removed when
// container namespace is gone. Find port matching DEL arguments and remove
// it explicitly.
portName, portFound, err := getOvsPortForContIface(ovsDriver, args.IfName, args.Netns)
if err != nil {
return fmt.Errorf("Failed to obtain OVS port for given connection: %v", err)
}

// Do not return an error if the port was not found, it may have been
// already removed by someone.
if portFound {
if err := removeOvsPort(ovsDriver, portName); err != nil {
return err
}
}
}

// Delete can be called multiple times, so don't return an error if the
Expand Down
83 changes: 82 additions & 1 deletion pkg/plugin/plugin_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,12 @@ import (
"strconv"
"strings"
"syscall"
"time"

"github.com/containernetworking/cni/pkg/skel"
"github.com/containernetworking/cni/pkg/types"
"github.com/containernetworking/cni/pkg/types/current"
"github.com/containernetworking/plugins/pkg/ip"
"github.com/containernetworking/plugins/pkg/ns"
"github.com/containernetworking/plugins/pkg/testutils"
"github.com/vishvananda/netlink"
Expand Down Expand Up @@ -406,7 +408,7 @@ var _ = Describe("CNI Plugin", func() {
OvnPort := "test-port"
result := attach(targetNs, conf, IFNAME, "", OvnPort)
hostIface := result.Interfaces[0]
output, err := exec.Command("ovs-vsctl", "--colum=external_ids", "find", "Interface", fmt.Sprintf("name=%s", hostIface.Name)).CombinedOutput()
output, err := exec.Command("ovs-vsctl", "--column=external_ids", "find", "Interface", fmt.Sprintf("name=%s", hostIface.Name)).CombinedOutput()
Expect(err).NotTo(HaveOccurred())
Expect(string(output[:len(output)-1])).To(Equal(ovsOutput))
})
Expand Down Expand Up @@ -447,6 +449,73 @@ var _ = Describe("CNI Plugin", func() {
testSplitVlanIds(trunks, nil, errors.New("incorrect trunk maxID parameter"), false)
})
})

Context("purge ports with failed interfaces", func() {
const IFNAME = "eth0"

conf := fmt.Sprintf(`{
"cniVersion": "0.3.1",
"name": "mynet",
"type": "ovs",
"OvnPort": "test-port",
"bridge": "%s"}`, BRIDGE_NAME)

It("DEL removes ports without network namespace", func() {
firstTargetNs, err := testutils.NewNS()
Expect(err).NotTo(HaveOccurred())
defer firstTargetNs.Close()

secondTargetNs, err := testutils.NewNS()
Expect(err).NotTo(HaveOccurred())
defer secondTargetNs.Close()

// Create two ports for two separate target namespaces.
firstResult := attach(firstTargetNs, conf, IFNAME, "", "test-port-1")
secondResult := attach(secondTargetNs, conf, IFNAME, "", "test-port-2")

// Remove the host interface of the first port. This makes the
// port faulty. Our test should remove the interfaces of this
// port, but not the interfaces of the second.
firstHostIface := firstResult.Interfaces[0]
err = ip.DelLinkByName(firstHostIface.Name)
Expect(err).NotTo(HaveOccurred())

// It takes a short while for OVS to notice that we removed the
// interface. Sometimes the test is faster. We therefore wait
// up to 1 second for the interface to fail.
waitForIfaceError(firstHostIface.Name, 10, 100*time.Millisecond)

secondHostIface := secondResult.Interfaces[0]

args := &skel.CmdArgs{
ContainerID: "dummy",
IfName: IFNAME,
StdinData: []byte(conf),
}
err = cmdDelWithArgs(args, func() error {
return CmdDel(args)
})
Expect(err).NotTo(HaveOccurred())

output, err := exec.Command("ovs-vsctl", "--column=name", "find", "Interface", fmt.Sprintf("name=%s", firstHostIface.Name)).CombinedOutput()
Expect(err).NotTo(HaveOccurred())
Expect(string(output)).To(Equal(""), "Faulty OVS interface should have been removed")

output, err = exec.Command("ovs-vsctl", "--column=name", "find", "Port", fmt.Sprintf("name=%s", firstHostIface.Name)).CombinedOutput()
Expect(err).NotTo(HaveOccurred())
Expect(string(output)).To(Equal(""), "Port with faulty OVS interface should have been removed")

output, err = exec.Command("ovs-vsctl", "--column=name", "find", "Interface", fmt.Sprintf("name=%s", secondHostIface.Name)).CombinedOutput()
Expect(err).NotTo(HaveOccurred())
Expect(string(output)).To(
ContainSubstring(fmt.Sprintf("\"%s\"", secondHostIface.Name)), "Healthy OVS interface should have been kept")

output, err = exec.Command("ovs-vsctl", "--column=name", "find", "Port", fmt.Sprintf("name=%s", secondHostIface.Name)).CombinedOutput()
Expect(err).NotTo(HaveOccurred())
Expect(string(output)).To(
ContainSubstring(fmt.Sprintf("\"%s\"", secondHostIface.Name)), "OVS port with healthy interface should have been kept")
})
})
})
})

Expand Down Expand Up @@ -512,3 +581,15 @@ func getPortAttribute(portName string, attributeName string) (string, error) {

return strings.TrimSpace(string(output[:])), nil
}

func waitForIfaceError(iface string, tries int, delay time.Duration) {
for i := 0; i < tries; i++ {
output, err := exec.Command("ovs-vsctl", "--column=error", "find", "Interface", fmt.Sprintf("name=%s", iface)).CombinedOutput()
Expect(err).NotTo(HaveOccurred())
if strings.Contains(string(output), fmt.Sprintf("could not open network device %s", iface)) {
return
}
time.Sleep(delay)
}
Fail(fmt.Sprintf("%s failed to reach error status after %d tries", iface, tries))
}

0 comments on commit 23d5721

Please sign in to comment.