Skip to content

Commit

Permalink
extend-table: Fix table ID double allocation after OVS restart.
Browse files Browse the repository at this point in the history
There were problems observed occasionally after OVS restart, the
OVS flow bundle installation from ovn-controller was failed because of
"GROUP_EXISTS" error, which end up with missing flows/groups/meters
in OVS until ovn-controller is restarted.

Example error logs in OVS:
2022-07-08T01:38:22.837Z|00676|connmgr|INFO|br-int<->unix#0: sending OFPGMFC_GROUP_EXISTS error reply to OFPT_BUNDLE_ADD_MESSAGE message
2022-07-08T01:38:22.913Z|00677|connmgr|INFO|br-int<->unix#0: sending OFPBFC_MSG_FAILED error reply to OFPT_BUNDLE_CONTROL message

The root cause is that with ovn-ofctrl-wait-before-clear set, ofctrl
module would call ovn_extend_table_clear() to clear the "existing" group
table AFTER ovn-controller finished computing the desired
flows/groups/meters in the state S_CLEAR. However, the function
ovn_extend_table_clear() clears the bitmap of the group IDs, while the
IDs are still being used by the "desired" group table. This is not a
problem if a recompute happens soon, the desired group table will be
cleared first and IDs will be reallocated and the bitmap will reflect
the actual allocations. However, if there are any group creation changes
(related to LB, ECMP, etc.) happen before the recompute, new IDs may be
allocated to be conflict with existing IDs because the cleared bitmap
status doesn't reflect the real IDs being used. The conflict IDs finally
causes the "GROUP_EXIST" error replied by OVS when ovn-controller tries
to install the desired groups to OVS. Even worse, because the group
modifications are now wrapped in a bundle with flow modifications, it
would end up with not only missing groups but also missing flows.

Both desired table and existing table share the same bitmap, which is to
avoid reallocating an ID that still exists in OVS, but the current
logic seems to have an assumption that the "existing" table entries are
deleted always AFTER the "desired" entries. This assumption is not true
after the introduction of ovn-ofctrl-wait-before-clear feature.

The fix here is to introduce a reference between the desired and
existing entries, so that when deleting an entry in either of the tables
it knows if the ID is still in use by its peer and decide if it is the
right time to clear the bit from the bitmap, without depending on the
order of deletion.

Fixes: 896adfd ("ofctrl: Support ovn-ofctrl-wait-before-clear to reduce down time during upgrade.")
Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=2112111
Signed-off-by: Han Zhou <[email protected]>
Acked-by: Numan Siddique <[email protected]>
(cherry picked from commit db15cf2)
  • Loading branch information
hzhou8 committed Jul 29, 2022
1 parent 0e3ee19 commit 3affdfe
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 37 deletions.
67 changes: 35 additions & 32 deletions lib/extend-table.c
Original file line number Diff line number Diff line change
Expand Up @@ -40,13 +40,17 @@ ovn_extend_table_init(struct ovn_extend_table *table)
}

static struct ovn_extend_table_info *
ovn_extend_table_info_alloc(const char *name, uint32_t id, bool is_new_id,
ovn_extend_table_info_alloc(const char *name, uint32_t id,
struct ovn_extend_table_info *peer,
uint32_t hash)
{
struct ovn_extend_table_info *e = xmalloc(sizeof *e);
e->name = xstrdup(name);
e->table_id = id;
e->new_table_id = is_new_id;
e->peer = peer;
if (peer) {
peer->peer = e;
}
e->hmap_node.hash = hash;
hmap_init(&e->references);
return e;
Expand Down Expand Up @@ -184,9 +188,10 @@ ovn_extend_table_clear(struct ovn_extend_table *table, bool existing)
/* Clear the target table. */
HMAP_FOR_EACH_SAFE (g, hmap_node, target) {
hmap_remove(target, &g->hmap_node);
/* Don't unset bitmap for desired group_info if the group_id
* was not freshly reserved. */
if (existing || g->new_table_id) {
if (g->peer) {
g->peer->peer = NULL;
} else {
/* Unset the bitmap because the peer is deleted already. */
bitmap_set0(table->table_ids, g->table_id);
}
ovn_extend_table_info_destroy(g);
Expand All @@ -209,11 +214,15 @@ void
ovn_extend_table_remove_existing(struct ovn_extend_table *table,
struct ovn_extend_table_info *existing)
{
/* Remove 'existing' from 'groups->existing' */
/* Remove 'existing' from 'table->existing' */
hmap_remove(&table->existing, &existing->hmap_node);

/* Dealloc group_id. */
bitmap_set0(table->table_ids, existing->table_id);
if (existing->peer) {
existing->peer->peer = NULL;
} else {
/* Dealloc the ID. */
bitmap_set0(table->table_ids, existing->table_id);
}
ovn_extend_table_info_destroy(existing);
}

Expand All @@ -230,7 +239,9 @@ ovn_extend_table_delete_desired(struct ovn_extend_table *table,
VLOG_DBG("%s: %s, "UUID_FMT, __func__,
e->name, UUID_ARGS(&l->lflow_uuid));
hmap_remove(&table->desired, &e->hmap_node);
if (e->new_table_id) {
if (e->peer) {
e->peer->peer = NULL;
} else {
bitmap_set0(table->table_ids, e->table_id);
}
ovn_extend_table_info_destroy(e);
Expand All @@ -254,17 +265,6 @@ ovn_extend_table_remove_desired(struct ovn_extend_table *table,
ovn_extend_table_delete_desired(table, l);
}

static struct ovn_extend_table_info*
ovn_extend_info_clone(struct ovn_extend_table_info *source)
{
struct ovn_extend_table_info *clone =
ovn_extend_table_info_alloc(source->name,
source->table_id,
source->new_table_id,
source->hmap_node.hash);
return clone;
}

void
ovn_extend_table_sync(struct ovn_extend_table *table)
{
Expand All @@ -273,11 +273,13 @@ ovn_extend_table_sync(struct ovn_extend_table *table)
/* Copy the contents of desired to existing. */
HMAP_FOR_EACH_SAFE (desired, hmap_node, &table->desired) {
if (!ovn_extend_table_lookup(&table->existing, desired)) {
desired->new_table_id = false;
struct ovn_extend_table_info *clone =
ovn_extend_info_clone(desired);
hmap_insert(&table->existing, &clone->hmap_node,
clone->hmap_node.hash);
struct ovn_extend_table_info *existing =
ovn_extend_table_info_alloc(desired->name,
desired->table_id,
desired,
desired->hmap_node.hash);
hmap_insert(&table->existing, &existing->hmap_node,
existing->hmap_node.hash);
}
}
}
Expand All @@ -289,7 +291,7 @@ ovn_extend_table_assign_id(struct ovn_extend_table *table, const char *name,
struct uuid lflow_uuid)
{
uint32_t table_id = 0, hash;
struct ovn_extend_table_info *table_info;
struct ovn_extend_table_info *table_info, *existing_info;

hash = hash_string(name, 0);

Expand All @@ -307,17 +309,18 @@ ovn_extend_table_assign_id(struct ovn_extend_table *table, const char *name,

/* Check whether we already have an installed entry for this
* combination. */
existing_info = NULL;
HMAP_FOR_EACH_WITH_HASH (table_info, hmap_node, hash, &table->existing) {
if (!strcmp(table_info->name, name)) {
table_id = table_info->table_id;
existing_info = table_info;
table_id = existing_info->table_id;
break;
}
}

bool new_table_id = false;
if (!table_id) {
/* Reserve a new group_id. */
if (!existing_info) {
/* Reserve a new id. */
table_id = bitmap_scan(table->table_ids, 0, 1, MAX_EXT_TABLE_ID + 1);
new_table_id = true;
}

if (table_id == MAX_EXT_TABLE_ID + 1) {
Expand All @@ -327,7 +330,7 @@ ovn_extend_table_assign_id(struct ovn_extend_table *table, const char *name,
}
bitmap_set1(table->table_ids, table_id);

table_info = ovn_extend_table_info_alloc(name, table_id, new_table_id,
table_info = ovn_extend_table_info_alloc(name, table_id, existing_info,
hash);

hmap_insert(&table->desired,
Expand Down
17 changes: 13 additions & 4 deletions lib/extend-table.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,12 @@
* such as the Group Table or Meter Table. */
struct ovn_extend_table {
unsigned long *table_ids; /* Used as a bitmap with value set
* for allocated group ids in either
* desired or existing. */
* for allocated ids in either desired or
* existing (or both). If the same "name"
* exists in both desired and existing tables,
* they must share the same ID. The "peer"
* pointer would tell if the ID is still used by
* the same item in the peer table. */
struct hmap desired;
struct hmap lflow_to_desired; /* Index for looking up desired table
* items from given lflow uuid, with
Expand All @@ -48,8 +52,13 @@ struct ovn_extend_table_info {
struct hmap_node hmap_node;
char *name; /* Name for the table entity. */
uint32_t table_id;
bool new_table_id; /* 'True' if 'table_id' was reserved from
* ovn_extend_table's 'table_ids' bitmap. */
struct ovn_extend_table_info *peer; /* The extend tables exist as pairs,
one for desired items and one for
existing items. "peer" maintains the
link between a pair of items in
these tables. If "peer" is NULL, it
means the counterpart is not created
yet or deleted already. */
struct hmap references; /* The lflows that are using this item, with
* ovn_extend_table_lflow_ref nodes. Only useful
* for items in ovn_extend_table.desired. */
Expand Down
26 changes: 25 additions & 1 deletion tests/ovn-controller.at
Original file line number Diff line number Diff line change
Expand Up @@ -2121,9 +2121,20 @@ check ovs-vsctl -- add-port br-int hv1-vif1 -- \

check ovn-nbctl ls-add ls1

check ovn-nbctl --wait=hv lsp-add ls1 ls1-lp1 \
check ovn-nbctl lsp-add ls1 ls1-lp1 \
-- lsp-set-addresses ls1-lp1 "f0:00:00:00:00:01 10.1.2.3"

check ovn-nbctl lb-add lb1 1.1.1.1 10.1.2.3 \
-- ls-lb-add ls1 lb1

check ovn-nbctl lb-add lb2 2.2.2.2 10.1.2.4 \
-- ls-lb-add ls1 lb2

check ovn-nbctl --wait=hv sync
# There should be 2 group IDs allocated
AT_CHECK([ovn-appctl -t ovn-controller group-table-list | awk '{print $2}' | sort | uniq | wc -l], [0], [2
])

# Set 5 seconds wait time before clearing OVS flows.
check ovs-vsctl set open . external_ids:ovn-ofctrl-wait-before-clear=5000

Expand Down Expand Up @@ -2156,6 +2167,19 @@ lflow_run_2=$(ovn-appctl -t ovn-controller coverage/read-counter lflow_run)
AT_CHECK_UNQUOTED([echo $lflow_run_1], [0], [$lflow_run_2
])

# Restart OVS this time, and wait until flows are reinstalled
OVS_APP_EXIT_AND_WAIT([ovs-vswitchd])
start_daemon ovs-vswitchd --enable-dummy=system -vvconn -vofproto_dpif -vunixctl
OVS_WAIT_UNTIL([ovs-ofctl dump-flows br-int | grep 10.1.2.4], [0], [ignore])

check ovn-nbctl --wait=hv lb-add lb3 2.2.2.2 10.1.2.5 \
-- ls-lb-add ls1 lb3

# There should be 3 group IDs allocated (this is to ensure the group ID
# allocation is correct after ofctrl state reset.
AT_CHECK([ovn-appctl -t ovn-controller group-table-list | awk '{print $2}' | sort | uniq | wc -l], [0], [3
])

OVN_CLEANUP([hv1])
AT_CLEANUP

Expand Down

0 comments on commit 3affdfe

Please sign in to comment.