From 618d3bdbb5ccff47c5e0db70bae420a1e97b7ec1 Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Wed, 19 Jul 2023 18:14:04 +0200 Subject: [PATCH] ofproto-dpif: Fix removal of renamed datapath ports. OVS configuration is based on port names and OpenFlow port numbers. Names are stored in the database and translated later to OF ports. On the datapath level, each port has a name and a datapath port number. Port name in the database has to match datapath port name, unless it's a tunnel port. If a datapath port is renamed with 'ip link set DEV name NAME', ovs-vswitchd will wake up, destroy all the OpenFlow-related structures and clean other things up. This is because the port no longer represents the port from a database due to a name difference. However, ovs-vswitch will not actually remove the port from the datapath, because it thinks that this port is no longer there. This is happening because lookup is performed by name and the name have changed. As a result we have a port in a datapath that is not related to any port known to ovs-vswitchd and ovs-vswitchd can't remove it. This port also occupies a datapath port number and prevents the port to be added back with a new name. Fix that by performing lookup by a datapath port number during the port destruction. The name was used only to avoid spurious warnings in a normal case where the port was successfully deleted by other parts of OVS. Adding an extra flag to avoid these warnings instead. Fixes: 02f8d6460afd ("ofproto-dpif: Query port existence by name to prevent warnings.") Reported-at: https://github.com/openvswitch/ovs-issues/issues/284 Tested-by: Alin-Gabriel Serdean Acked-by: Alin-Gabriel Serdean Acked-by: Aaron Conole Signed-off-by: Ilya Maximets --- lib/dpctl.c | 2 +- lib/dpif.c | 16 +++++++---- lib/dpif.h | 2 +- ofproto/ofproto-dpif.c | 14 ++++++---- tests/system-interface.at | 57 +++++++++++++++++++++++++++++++++++++++ 5 files changed, 79 insertions(+), 12 deletions(-) diff --git a/lib/dpctl.c b/lib/dpctl.c index bfc39732950..41b23d8aeb8 100644 --- a/lib/dpctl.c +++ b/lib/dpctl.c @@ -673,7 +673,7 @@ show_dpif(struct dpif *dpif, struct dpctl_params *dpctl_p) } for (int i = 0; i < n_port_nos; i++) { - if (dpif_port_query_by_number(dpif, port_nos[i], &dpif_port)) { + if (dpif_port_query_by_number(dpif, port_nos[i], &dpif_port, true)) { continue; } diff --git a/lib/dpif.c b/lib/dpif.c index 3305401fe01..4397aeaf4fe 100644 --- a/lib/dpif.c +++ b/lib/dpif.c @@ -701,13 +701,14 @@ dpif_port_set_config(struct dpif *dpif, odp_port_t port_no, * initializes '*port' appropriately; on failure, returns a positive errno * value. * - * Retuns ENODEV if the port doesn't exist. + * Retuns ENODEV if the port doesn't exist. Will not log a warning in this + * case unless 'warn_if_not_found' is true. * * The caller owns the data in 'port' and must free it with * dpif_port_destroy() when it is no longer needed. */ int dpif_port_query_by_number(const struct dpif *dpif, odp_port_t port_no, - struct dpif_port *port) + struct dpif_port *port, bool warn_if_not_found) { int error = dpif->dpif_class->port_query_by_number(dpif, port_no, port); if (!error) { @@ -715,8 +716,13 @@ dpif_port_query_by_number(const struct dpif *dpif, odp_port_t port_no, dpif_name(dpif), port_no, port->name); } else { memset(port, 0, sizeof *port); - VLOG_WARN_RL(&error_rl, "%s: failed to query port %"PRIu32": %s", - dpif_name(dpif), port_no, ovs_strerror(error)); + if (error == ENODEV && !warn_if_not_found) { + VLOG_DBG_RL(&dpmsg_rl, "%s: failed to query port %"PRIu32": %s", + dpif_name(dpif), port_no, ovs_strerror(error)); + } else { + VLOG_WARN_RL(&error_rl, "%s: failed to query port %"PRIu32": %s", + dpif_name(dpif), port_no, ovs_strerror(error)); + } } return error; } @@ -784,7 +790,7 @@ dpif_port_get_name(struct dpif *dpif, odp_port_t port_no, ovs_assert(name_size > 0); - error = dpif_port_query_by_number(dpif, port_no, &port); + error = dpif_port_query_by_number(dpif, port_no, &port, true); if (!error) { ovs_strlcpy(name, port.name, name_size); dpif_port_destroy(&port); diff --git a/lib/dpif.h b/lib/dpif.h index 129cbf6a1d5..2a364972095 100644 --- a/lib/dpif.h +++ b/lib/dpif.h @@ -461,7 +461,7 @@ void dpif_port_clone(struct dpif_port *, const struct dpif_port *); void dpif_port_destroy(struct dpif_port *); bool dpif_port_exists(const struct dpif *dpif, const char *devname); int dpif_port_query_by_number(const struct dpif *, odp_port_t port_no, - struct dpif_port *); + struct dpif_port *, bool warn_if_not_found); int dpif_port_query_by_name(const struct dpif *, const char *devname, struct dpif_port *); int dpif_port_get_name(struct dpif *, odp_port_t port_no, diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c index fad7342b0b0..e22ca757ac3 100644 --- a/ofproto/ofproto-dpif.c +++ b/ofproto/ofproto-dpif.c @@ -2161,8 +2161,7 @@ port_destruct(struct ofport *port_, bool del) struct ofproto_dpif *ofproto = ofproto_dpif_cast(port->up.ofproto); const char *devname = netdev_get_name(port->up.netdev); const char *netdev_type = netdev_get_type(port->up.netdev); - char namebuf[NETDEV_VPORT_NAME_BUFSIZE]; - const char *dp_port_name; + struct dpif_port dpif_port; ofproto->backer->need_revalidate = REV_RECONFIGURE; xlate_txn_start(); @@ -2176,9 +2175,13 @@ port_destruct(struct ofport *port_, bool del) del = dpif_cleanup_required(ofproto->backer->dpif); } - dp_port_name = netdev_vport_get_dpif_port(port->up.netdev, namebuf, - sizeof namebuf); - if (del && dpif_port_exists(ofproto->backer->dpif, dp_port_name)) { + /* Don't try to delete ports that are not part of the datapath. */ + if (del && port->odp_port == ODPP_NONE) { + del = false; + } + + if (del && !dpif_port_query_by_number(ofproto->backer->dpif, + port->odp_port, &dpif_port, false)) { /* The underlying device is still there, so delete it. This * happens when the ofproto is being destroyed, since the caller * assumes that removal of attached ports will happen as part of @@ -2186,6 +2189,7 @@ port_destruct(struct ofport *port_, bool del) if (!port->is_tunnel) { dpif_port_del(ofproto->backer->dpif, port->odp_port, false); } + dpif_port_destroy(&dpif_port); } else if (del) { /* The underlying device is already deleted (e.g. tunctl -d). * Calling dpif_port_remove to do local cleanup for the netdev */ diff --git a/tests/system-interface.at b/tests/system-interface.at index 3bf339582dd..15e789a2459 100644 --- a/tests/system-interface.at +++ b/tests/system-interface.at @@ -122,3 +122,60 @@ AT_CHECK([ip link show | grep " genev_sys_[[0-9]]*: .* ovs-system " | diff -u - OVS_TRAFFIC_VSWITCHD_STOP AT_CLEANUP + +AT_SETUP([interface - datapath port rename]) +OVS_TRAFFIC_VSWITCHD_START() + +dnl Not relevant for userspace datapath. +AT_SKIP_IF([! ovs-appctl dpctl/show | grep -q ovs-system]) + +AT_CHECK([ip link add ovs-veth0 type veth peer name ovs-veth1]) +dnl We will rename ovs-veth0, so removing the peer on exit. +on_exit 'ip link del ovs-veth1' + +AT_CHECK([ovs-vsctl add-port br0 ovs-veth0]) + +OVS_WAIT_UNTIL([ip link show | grep -q "ovs-veth0.* ovs-system "]) + +AT_CHECK([ovs-appctl dpctl/show | grep port], [0], [dnl + port 0: ovs-system (internal) + port 1: br0 (internal) + port 2: ovs-veth0 +]) + +dnl Rename the interface while attached to OVS. +AT_CHECK([ip l set ovs-veth0 name ovs-new-port]) + +dnl Wait for the port to be detached from the OVS datapath. +OVS_WAIT_UNTIL([ip link show | grep "ovs-new-port" | grep -v "ovs-system"]) + +dnl Check that database indicates the error. +AT_CHECK([ovs-vsctl get interface ovs-veth0 error], [0], [dnl +"could not open network device ovs-veth0 (No such device)" +]) + +dnl Check that the port is no longer in the datapath. +AT_CHECK([ovs-appctl dpctl/show | grep port], [0], [dnl + port 0: ovs-system (internal) + port 1: br0 (internal) +]) + +dnl Rename the interface back and check that it is in use again. +AT_CHECK([ip l set ovs-new-port name ovs-veth0]) + +OVS_WAIT_UNTIL([ip link show | grep -q "ovs-veth0.* ovs-system "]) + +AT_CHECK([ovs-vsctl get interface ovs-veth0 error], [0], [dnl +[[]] +]) + +AT_CHECK([ovs-appctl dpctl/show | grep port], [0], [dnl + port 0: ovs-system (internal) + port 1: br0 (internal) + port 2: ovs-veth0 +]) + +OVS_TRAFFIC_VSWITCHD_STOP([" + /could not open network device ovs-veth0 (No such device)/d +"]) +AT_CLEANUP