From 3affdfe8952fa359ed52aa0cbb28ffb879ba6fb6 Mon Sep 17 00:00:00 2001 From: Han Zhou Date: Tue, 26 Jul 2022 18:46:23 -0700 Subject: [PATCH] extend-table: Fix table ID double allocation after OVS restart. There were problems observed occasionally after OVS restart, the OVS flow bundle installation from ovn-controller was failed because of "GROUP_EXISTS" error, which end up with missing flows/groups/meters in OVS until ovn-controller is restarted. Example error logs in OVS: 2022-07-08T01:38:22.837Z|00676|connmgr|INFO|br-int<->unix#0: sending OFPGMFC_GROUP_EXISTS error reply to OFPT_BUNDLE_ADD_MESSAGE message 2022-07-08T01:38:22.913Z|00677|connmgr|INFO|br-int<->unix#0: sending OFPBFC_MSG_FAILED error reply to OFPT_BUNDLE_CONTROL message The root cause is that with ovn-ofctrl-wait-before-clear set, ofctrl module would call ovn_extend_table_clear() to clear the "existing" group table AFTER ovn-controller finished computing the desired flows/groups/meters in the state S_CLEAR. However, the function ovn_extend_table_clear() clears the bitmap of the group IDs, while the IDs are still being used by the "desired" group table. This is not a problem if a recompute happens soon, the desired group table will be cleared first and IDs will be reallocated and the bitmap will reflect the actual allocations. However, if there are any group creation changes (related to LB, ECMP, etc.) happen before the recompute, new IDs may be allocated to be conflict with existing IDs because the cleared bitmap status doesn't reflect the real IDs being used. The conflict IDs finally causes the "GROUP_EXIST" error replied by OVS when ovn-controller tries to install the desired groups to OVS. Even worse, because the group modifications are now wrapped in a bundle with flow modifications, it would end up with not only missing groups but also missing flows. Both desired table and existing table share the same bitmap, which is to avoid reallocating an ID that still exists in OVS, but the current logic seems to have an assumption that the "existing" table entries are deleted always AFTER the "desired" entries. This assumption is not true after the introduction of ovn-ofctrl-wait-before-clear feature. The fix here is to introduce a reference between the desired and existing entries, so that when deleting an entry in either of the tables it knows if the ID is still in use by its peer and decide if it is the right time to clear the bit from the bitmap, without depending on the order of deletion. Fixes: 896adfd2d8b3 ("ofctrl: Support ovn-ofctrl-wait-before-clear to reduce down time during upgrade.") Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=2112111 Signed-off-by: Han Zhou Acked-by: Numan Siddique (cherry picked from commit db15cf29a1f9857b55389f424c5d747406550cb7) --- lib/extend-table.c | 67 +++++++++++++++++++++-------------------- lib/extend-table.h | 17 ++++++++--- tests/ovn-controller.at | 26 +++++++++++++++- 3 files changed, 73 insertions(+), 37 deletions(-) diff --git a/lib/extend-table.c b/lib/extend-table.c index 4c3c4fac22..ebb1a054cb 100644 --- a/lib/extend-table.c +++ b/lib/extend-table.c @@ -40,13 +40,17 @@ ovn_extend_table_init(struct ovn_extend_table *table) } static struct ovn_extend_table_info * -ovn_extend_table_info_alloc(const char *name, uint32_t id, bool is_new_id, +ovn_extend_table_info_alloc(const char *name, uint32_t id, + struct ovn_extend_table_info *peer, uint32_t hash) { struct ovn_extend_table_info *e = xmalloc(sizeof *e); e->name = xstrdup(name); e->table_id = id; - e->new_table_id = is_new_id; + e->peer = peer; + if (peer) { + peer->peer = e; + } e->hmap_node.hash = hash; hmap_init(&e->references); return e; @@ -184,9 +188,10 @@ ovn_extend_table_clear(struct ovn_extend_table *table, bool existing) /* Clear the target table. */ HMAP_FOR_EACH_SAFE (g, hmap_node, target) { hmap_remove(target, &g->hmap_node); - /* Don't unset bitmap for desired group_info if the group_id - * was not freshly reserved. */ - if (existing || g->new_table_id) { + if (g->peer) { + g->peer->peer = NULL; + } else { + /* Unset the bitmap because the peer is deleted already. */ bitmap_set0(table->table_ids, g->table_id); } ovn_extend_table_info_destroy(g); @@ -209,11 +214,15 @@ void ovn_extend_table_remove_existing(struct ovn_extend_table *table, struct ovn_extend_table_info *existing) { - /* Remove 'existing' from 'groups->existing' */ + /* Remove 'existing' from 'table->existing' */ hmap_remove(&table->existing, &existing->hmap_node); - /* Dealloc group_id. */ - bitmap_set0(table->table_ids, existing->table_id); + if (existing->peer) { + existing->peer->peer = NULL; + } else { + /* Dealloc the ID. */ + bitmap_set0(table->table_ids, existing->table_id); + } ovn_extend_table_info_destroy(existing); } @@ -230,7 +239,9 @@ ovn_extend_table_delete_desired(struct ovn_extend_table *table, VLOG_DBG("%s: %s, "UUID_FMT, __func__, e->name, UUID_ARGS(&l->lflow_uuid)); hmap_remove(&table->desired, &e->hmap_node); - if (e->new_table_id) { + if (e->peer) { + e->peer->peer = NULL; + } else { bitmap_set0(table->table_ids, e->table_id); } ovn_extend_table_info_destroy(e); @@ -254,17 +265,6 @@ ovn_extend_table_remove_desired(struct ovn_extend_table *table, ovn_extend_table_delete_desired(table, l); } -static struct ovn_extend_table_info* -ovn_extend_info_clone(struct ovn_extend_table_info *source) -{ - struct ovn_extend_table_info *clone = - ovn_extend_table_info_alloc(source->name, - source->table_id, - source->new_table_id, - source->hmap_node.hash); - return clone; -} - void ovn_extend_table_sync(struct ovn_extend_table *table) { @@ -273,11 +273,13 @@ ovn_extend_table_sync(struct ovn_extend_table *table) /* Copy the contents of desired to existing. */ HMAP_FOR_EACH_SAFE (desired, hmap_node, &table->desired) { if (!ovn_extend_table_lookup(&table->existing, desired)) { - desired->new_table_id = false; - struct ovn_extend_table_info *clone = - ovn_extend_info_clone(desired); - hmap_insert(&table->existing, &clone->hmap_node, - clone->hmap_node.hash); + struct ovn_extend_table_info *existing = + ovn_extend_table_info_alloc(desired->name, + desired->table_id, + desired, + desired->hmap_node.hash); + hmap_insert(&table->existing, &existing->hmap_node, + existing->hmap_node.hash); } } } @@ -289,7 +291,7 @@ ovn_extend_table_assign_id(struct ovn_extend_table *table, const char *name, struct uuid lflow_uuid) { uint32_t table_id = 0, hash; - struct ovn_extend_table_info *table_info; + struct ovn_extend_table_info *table_info, *existing_info; hash = hash_string(name, 0); @@ -307,17 +309,18 @@ ovn_extend_table_assign_id(struct ovn_extend_table *table, const char *name, /* Check whether we already have an installed entry for this * combination. */ + existing_info = NULL; HMAP_FOR_EACH_WITH_HASH (table_info, hmap_node, hash, &table->existing) { if (!strcmp(table_info->name, name)) { - table_id = table_info->table_id; + existing_info = table_info; + table_id = existing_info->table_id; + break; } } - bool new_table_id = false; - if (!table_id) { - /* Reserve a new group_id. */ + if (!existing_info) { + /* Reserve a new id. */ table_id = bitmap_scan(table->table_ids, 0, 1, MAX_EXT_TABLE_ID + 1); - new_table_id = true; } if (table_id == MAX_EXT_TABLE_ID + 1) { @@ -327,7 +330,7 @@ ovn_extend_table_assign_id(struct ovn_extend_table *table, const char *name, } bitmap_set1(table->table_ids, table_id); - table_info = ovn_extend_table_info_alloc(name, table_id, new_table_id, + table_info = ovn_extend_table_info_alloc(name, table_id, existing_info, hash); hmap_insert(&table->desired, diff --git a/lib/extend-table.h b/lib/extend-table.h index 6d81877afa..b43a146b4f 100644 --- a/lib/extend-table.h +++ b/lib/extend-table.h @@ -28,8 +28,12 @@ * such as the Group Table or Meter Table. */ struct ovn_extend_table { unsigned long *table_ids; /* Used as a bitmap with value set - * for allocated group ids in either - * desired or existing. */ + * for allocated ids in either desired or + * existing (or both). If the same "name" + * exists in both desired and existing tables, + * they must share the same ID. The "peer" + * pointer would tell if the ID is still used by + * the same item in the peer table. */ struct hmap desired; struct hmap lflow_to_desired; /* Index for looking up desired table * items from given lflow uuid, with @@ -48,8 +52,13 @@ struct ovn_extend_table_info { struct hmap_node hmap_node; char *name; /* Name for the table entity. */ uint32_t table_id; - bool new_table_id; /* 'True' if 'table_id' was reserved from - * ovn_extend_table's 'table_ids' bitmap. */ + struct ovn_extend_table_info *peer; /* The extend tables exist as pairs, + one for desired items and one for + existing items. "peer" maintains the + link between a pair of items in + these tables. If "peer" is NULL, it + means the counterpart is not created + yet or deleted already. */ struct hmap references; /* The lflows that are using this item, with * ovn_extend_table_lflow_ref nodes. Only useful * for items in ovn_extend_table.desired. */ diff --git a/tests/ovn-controller.at b/tests/ovn-controller.at index b8db342b9e..f71977291e 100644 --- a/tests/ovn-controller.at +++ b/tests/ovn-controller.at @@ -2121,9 +2121,20 @@ check ovs-vsctl -- add-port br-int hv1-vif1 -- \ check ovn-nbctl ls-add ls1 -check ovn-nbctl --wait=hv lsp-add ls1 ls1-lp1 \ +check ovn-nbctl lsp-add ls1 ls1-lp1 \ -- lsp-set-addresses ls1-lp1 "f0:00:00:00:00:01 10.1.2.3" +check ovn-nbctl lb-add lb1 1.1.1.1 10.1.2.3 \ +-- ls-lb-add ls1 lb1 + +check ovn-nbctl lb-add lb2 2.2.2.2 10.1.2.4 \ +-- ls-lb-add ls1 lb2 + +check ovn-nbctl --wait=hv sync +# There should be 2 group IDs allocated +AT_CHECK([ovn-appctl -t ovn-controller group-table-list | awk '{print $2}' | sort | uniq | wc -l], [0], [2 +]) + # Set 5 seconds wait time before clearing OVS flows. check ovs-vsctl set open . external_ids:ovn-ofctrl-wait-before-clear=5000 @@ -2156,6 +2167,19 @@ lflow_run_2=$(ovn-appctl -t ovn-controller coverage/read-counter lflow_run) AT_CHECK_UNQUOTED([echo $lflow_run_1], [0], [$lflow_run_2 ]) +# Restart OVS this time, and wait until flows are reinstalled +OVS_APP_EXIT_AND_WAIT([ovs-vswitchd]) +start_daemon ovs-vswitchd --enable-dummy=system -vvconn -vofproto_dpif -vunixctl +OVS_WAIT_UNTIL([ovs-ofctl dump-flows br-int | grep 10.1.2.4], [0], [ignore]) + +check ovn-nbctl --wait=hv lb-add lb3 2.2.2.2 10.1.2.5 \ +-- ls-lb-add ls1 lb3 + +# There should be 3 group IDs allocated (this is to ensure the group ID +# allocation is correct after ofctrl state reset. +AT_CHECK([ovn-appctl -t ovn-controller group-table-list | awk '{print $2}' | sort | uniq | wc -l], [0], [3 +]) + OVN_CLEANUP([hv1]) AT_CLEANUP