Skip to content

Commit

Permalink
tests: Fix multiple ovn-ic race conditions.
Browse files Browse the repository at this point in the history
Multiple ovn-ic tests were failing flakily in the following cases:
- There are two availability zones (az1 & az2)
- Ports and routes are added in az2
  ovn_as az2 ovn-nbctl lrp-add lr.. lrp..
  ovn_as az2 ovn-nbctl lr-route-add ...
- We run check ovn-ic-nbctl --wait=sb sync
- We expect route to be learned on az1 (ovn_as az1 ovn-nbctl lr-route-list ...).

We have a few race conditions:
A) The wait=sb sync wait for the seq_nb to be written in ic-nb, read by az1/ovn-ic and az2/ovn-ic,
   which both update their az/seq_nb. When both are updated, one az/ovn-ic updates ic-sb/seq_nb.
   There is no check on az2/sb. If az2/sb is slow, ovn-ic-nbctl --wait=sb sync (and as az1 ovn-nbctl lr-route-list)
   might be completed before the route update is notified to ic-sb and learned by az1.
   -> This can be avoided/fixed by
     1) Adding ovn-nbctl --wait=sb in ovn-nbctl lrp-add
     2) Implementing a ovn-ic-nbctl --wait=hv sync which would go until az/sb. This is not really necessary here as [1] is fine.

B) When ovn-nbctl lrp-add lr12 returns, it's in az2/nb (i.e. a new LRP in az2/nb)
  - After ovn-ic-nbctl --wait=sb sync, if az1/ovn-ic runs first:
    az1/ic write az1/seq_nb in ic/sb
    Next time az2/ovn-ic runs (when handling ovn-ic-nbctl --wait=sb sync), it reads the route related info and writes in (as well as seq_nb) in ic-sb
    ovn-ic-nbctl --wait=sb sync returns, but route related info is only in ic/sb. No guarantee it has been handled by az1/ovn-ic.
    -> This can be handled/fixed by
    3) Calling twice wait=sb sync
    4) Implementing a ovn-ic-nbctl --wait=sb sync making sure an update from one az is handled by the other az (implementing [3]).
    [4]  is not really necessary here as [3] is fine.

Signed-off-by: Xavier Simonart <[email protected]>
Acked-by: Mark Michelson <[email protected]>
Signed-off-by: Mark Michelson <[email protected]>
  • Loading branch information
simonartxavier authored and putnopvut committed Sep 11, 2024
1 parent 39030ed commit d81738a
Showing 1 changed file with 31 additions and 16 deletions.
47 changes: 31 additions & 16 deletions tests/ovn-ic.at
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ done
create_ic_infra() {
az_id=$1
ts_id=$2
az=az$i
az=az$1

lsp=lsp${az_id}-${ts_id}
lrp=lrp${az_id}-${ts_id}
Expand All @@ -184,15 +184,15 @@ create_ic_infra() {

check ovn-ic-nbctl --wait=sb ts-add $ts
check ovn-nbctl lr-add $lr
check ovn-nbctl lrp-add $lr $lrp 00:00:00:00:00:0$az_id 10.0.$az_id.1/24
check ovn-nbctl --wait=sb lrp-add $lr $lrp 00:00:00:00:00:0$az_id 10.0.$az_id.1/24
check ovn-nbctl lrp-set-gateway-chassis $lrp gw-$az

check ovn-nbctl lsp-add $ts $lsp -- \
lsp-set-addresses $lsp router -- \
lsp-set-type $lsp router -- \
lsp-set-options $lsp router-port=$lrp

check ovn-nbctl lr-route-add $lr 192.168.0.0/16 10.0.$az_id.10
check ovn-nbctl --wait=sb lr-route-add $lr 192.168.0.0/16 10.0.$az_id.10
}

create_ic_infra 1 1
Expand All @@ -209,7 +209,7 @@ check_row_count ic-sb:Route 3 ip_prefix=192.168.0.0/16
check ovn-ic-nbctl --wait=sb ts-del ts1-1
ovn-ic-sbctl list route
ovn-ic-nbctl list transit_switch
checl_row_count ic-sb:route 2 ip_prefix=192.168.0.0/16
check_row_count ic-sb:route 2 ip_prefix=192.168.0.0/16
ovn-ic-sbctl list route

for i in 1 2; do
Expand Down Expand Up @@ -255,21 +255,19 @@ for i in 1 2; do
check ovn-nbctl lrp-add lr1 lrp$i 00:00:00:00:0$i:01 10.0.$i.1/24
check ovn-nbctl lrp-set-gateway-chassis lrp$i gw-az$i

check ovn-nbctl lsp-add ts1 lsp$i -- \
check ovn-nbctl --wait=sb lsp-add ts1 lsp$i -- \
lsp-set-addresses lsp$i router -- \
lsp-set-type lsp$i router -- \
lsp-set-options lsp$i router-port=lrp$i
done

ovn_as az1

ovn-nbctl \
--id=@id create logical-router-static-route ip_prefix=1.1.1.1/32 nexthop=10.0.1.10 -- \
add logical-router lr1 static_routes @id
ovn-nbctl \
--id=@id create logical-router-static-route ip_prefix=1.1.1.1/32 nexthop=10.0.1.10 -- \
add logical-router lr1 static_routes @id

check ovn-ic-nbctl --wait=sb sync
check ovn-ic-nbctl --wait=sb sync
check_row_count ic-sb:route 1 ip_prefix=1.1.1.1/32

Expand Down Expand Up @@ -455,13 +453,15 @@ Route Table <main>:
# Delete route in AZ1, AZ2's learned route should be deleted.
ovn_as az1 ovn-nbctl lr-route-del lr1 10.11.1.0/24
ovn-ic-nbctl --wait=sb sync
ovn-ic-nbctl --wait=sb sync
AT_CHECK([ovn_as az2 ovn-nbctl lr-route-list lr2 | grep -c learned], [1], [dnl
0
])

# Add the route back
ovn_as az1 ovn-nbctl lr-route-add lr1 10.11.1.0/24 169.254.0.1
ovn-ic-nbctl --wait=sb sync
ovn-ic-nbctl --wait=sb sync
AT_CHECK([ovn_as az2 ovn-nbctl lr-route-list lr2 | grep -c learned], [0], [dnl
1
])
Expand All @@ -485,6 +485,7 @@ ovn_as az1 ovn-nbctl set nb_global . options:ic-route-adv=false
# AZ2 shouldn't have the route learned, because AZ1 should have stopped
# advertising.
check ovn-ic-nbctl --wait=sb sync
check ovn-ic-nbctl --wait=sb sync
AT_CHECK([ovn_as az2 ovn-nbctl lr-route-list lr2], [0], [dnl
IPv4 Routes
Route Table <main>:
Expand All @@ -499,6 +500,7 @@ ovn_as az1 ovn-nbctl lr-route-add lr1 0.0.0.0/0 169.254.0.3
ovn_as az1 ovn-nbctl set nb_global . options:ic-route-adv=true
ovn_as az1 ovn-nbctl set nb_global . options:ic-route-learn=true
check ovn-ic-nbctl --wait=sb sync
check ovn-ic-nbctl --wait=sb sync

# Default route should NOT get advertised or learned, by default.
AT_CHECK([ovn_as az2 ovn-nbctl lr-route-list lr2], [0], [dnl
Expand Down Expand Up @@ -576,7 +578,7 @@ for i in 1 2; do
done

# Create directly-connected routes
ovn_as az2 ovn-nbctl lrp-add lr12 lrp-lr12-ls2 aa:aa:aa:aa:bb:01 "192.168.0.1/24"
ovn_as az2 ovn-nbctl --wait=sb lrp-add lr12 lrp-lr12-ls2 aa:aa:aa:aa:bb:01 "192.168.0.1/24"
ovn_as az2 ovn-nbctl lr-route-add lr12 10.10.10.0/24 192.168.0.10
ovn_as az1 ovn-nbctl --wait=sb sync

Expand All @@ -585,6 +587,7 @@ ovn_as az1 ovn-nbctl show
echo az2
ovn_as az2 ovn-nbctl show
check ovn-ic-nbctl --wait=sb sync
check ovn-ic-nbctl --wait=sb sync

# Test routes from lr12 were learned to lr11
AT_CHECK([ovn_as az1 ovn-nbctl lr-route-list lr11 |
Expand Down Expand Up @@ -626,7 +629,7 @@ for i in 1 2; do
ovn-nbctl lrp-add lr$i lrp-lr$i-p$i 00:00:00:00:00:0$i 192.168.$i.1/24

# Create static routes
ovn-nbctl lr-route-add lr$i 10.11.$i.0/24 169.254.0.1
ovn-nbctl --wait=sb lr-route-add lr$i 10.11.$i.0/24 169.254.0.1

# Create a src-ip route, which shouldn't be synced
ovn-nbctl --policy=src-ip lr-route-add lr$i 10.22.$i.0/24 169.254.0.2
Expand Down Expand Up @@ -665,7 +668,6 @@ ovn-ic-nbctl ts-add ts1
for i in 1 2; do
ovn_start az$i
ovn_as az$i

# Enable route learning at AZ level
ovn-nbctl set nb_global . options:ic-route-learn=true
# Enable route advertising at AZ level
Expand All @@ -680,9 +682,10 @@ for i in 1 2; do
-- lsp-set-type lsp-ts1-lr$i router \
-- lsp-set-options lsp-ts1-lr$i router-port=lrp-lr$i-ts1

ovn-nbctl lrp-add lr$i lrp-lr$i-p$i 00:00:00:00:00:0$i 2002:db8:1::$i/64
ovn-nbctl --wait=sb lrp-add lr$i lrp-lr$i-p$i 00:00:00:00:00:0$i 2002:db8:1::$i/64
done

check ovn-ic-nbctl --wait=sb sync
check ovn-ic-nbctl --wait=sb sync
AT_CHECK([ovn_as az1 ovn-nbctl lr-route-list lr1 | awk '/learned/{print $1, $2}'], [0], [dnl
2002:db8:1::/64 2001:db8:1::2
Expand Down Expand Up @@ -733,6 +736,7 @@ for i in 1 2; do
ovn-nbctl --policy=src-ip --route-table=rtb1 lr-route-add lr$i 10.22.$i.0/24 169.254.0.2
done

check ovn-ic-nbctl --wait=sb sync
check ovn-ic-nbctl --wait=sb sync
AT_CHECK([ovn_as az1 ovn-nbctl lr-route-list lr1], [0], [dnl
IPv4 Routes
Expand All @@ -750,6 +754,7 @@ for i in 1 2; do
ovn_as az$i ovn-nbctl --route-table=rtb1 lr-route-add lr$i 10.11.$i.0/24 169.254.0.1
done

check ovn-ic-nbctl --wait=sb sync
check ovn-ic-nbctl --wait=sb sync
# ensure route from rtb1 is not learned to any route table as route table is
# not set to TS port
Expand Down Expand Up @@ -975,6 +980,7 @@ ovn_as az2 ovn-nbctl --route-table=rtb3 lr-route-add lr12 10.10.10.0/24 192.168.
# Create directly-connected route in VPC2
ovn_as az2 ovn-nbctl --wait=sb lrp-add lr22 lrp-lr22 aa:aa:aa:aa:bb:01 "192.168.0.1/24"
check ovn-ic-nbctl --wait=sb sync
check ovn-ic-nbctl --wait=sb sync
# Test direct routes from lr12 were learned to lr11
OVS_WAIT_FOR_OUTPUT([ovn_as az1 ovn-nbctl lr-route-list lr11 | grep 192.168 |
grep learned | awk '{print $1, $2, $5}' | sort ], [0], [dnl
Expand Down Expand Up @@ -1102,6 +1108,10 @@ ovn_as az2 ovn-nbctl --route-table=rtb3 lr-route-add lr12 2001:db8:aaaa::/64 200
ovn_as az2 ovn-nbctl --wait=sb lrp-add lr22 lrp-lr22 aa:aa:aa:aa:bb:01 "2001:db8:200::1/64"

# Test direct routes from lr12 were learned to lr11
#
# We need to wait twice: first time for az2/ic to handle port addition and update ic/sb and
# second time for az1/ic to handle ic/sb update.
check ovn-ic-nbctl --wait=sb sync
check ovn-ic-nbctl --wait=sb sync
AT_CHECK([ovn_as az1 ovn-nbctl lr-route-list lr11 | grep 2001:db8:200 |
grep learned | awk '{print $1, $2, $5}' | sort], [0], [dnl
Expand Down Expand Up @@ -1177,6 +1187,7 @@ for i in 1 2; do
check ovn-nbctl --wait=sb lr-route-add $lr 0.0.0.0/0 192.168.$i.11
done

check ovn-ic-nbctl --wait=sb sync
check ovn-ic-nbctl --wait=sb sync
AT_CHECK([ovn_as az1 ovn-nbctl lr-route-list lr11 | grep dst-ip | sort] , [0], [dnl
0.0.0.0/0 192.168.1.11 dst-ip
Expand Down Expand Up @@ -1249,14 +1260,14 @@ for i in 1 2; do
-- lsp-set-options $lsp router-port=$lrp
done


# Create directly-connected routes
ovn_as az1 ovn-nbctl lrp-add lr11 lrp-lr11 aa:aa:aa:aa:bb:01 "192.168.0.1/24"
ovn_as az2 ovn-nbctl lrp-add lr21 lrp-lr21 aa:aa:aa:aa:bc:01 "192.168.1.1/24"
ovn_as az2 ovn-nbctl lrp-add lr22 lrp-lr22 aa:aa:aa:aa:bc:02 "192.168.2.1/24"
ovn_as az2 ovn-nbctl --wait=sb lrp-add lr22 lrp-lr22 aa:aa:aa:aa:bc:02 "192.168.2.1/24"

# Test direct routes from lr21 and lr22 were learned to lr11
check ovn-ic-nbctl --wait=sb sync
check ovn-ic-nbctl --wait=sb sync
AT_CHECK([ovn_as az1 ovn-nbctl lr-route-list lr11 | grep 192.168 |
grep learned | awk '{print $1, $2}' | sort ], [0], [dnl
192.168.1.0/24 169.254.10.21
Expand Down Expand Up @@ -1335,7 +1346,6 @@ check ovn-ic-nbctl ts-add ts1
for i in 1 2; do
ovn_start az$i
ovn_as az$i

# Enable route learning at AZ level
check ovn-nbctl set nb_global . options:ic-route-learn=true
# Enable route advertising at AZ level
Expand Down Expand Up @@ -1369,10 +1379,11 @@ for i in 1 2; do
33:33:33:33:33:3$i 2005:1734:5678::$i/50

# additional not filtered prefix -> different subnet bits
check ovn-nbctl lrp-add lr$i lrp-lr$i-p-ext4$i \
check ovn-nbctl --wait=sb lrp-add lr$i lrp-lr$i-p-ext4$i \
44:44:44:44:44:4$i 2005:1834:5678::$i/50
done

check ovn-ic-nbctl --wait=sb sync
check ovn-ic-nbctl --wait=sb sync
AT_CHECK([ovn_as az1 ovn-nbctl lr-route-list lr1 |
awk '/learned/{print $1, $2}' ], [0], [dnl
Expand All @@ -1387,6 +1398,7 @@ for i in 1 2; do
check ovn-nbctl remove nb_global . options ic-route-denylist
done

check ovn-ic-nbctl --wait=sb sync
check ovn-ic-nbctl --wait=sb sync
AT_CHECK([ovn_as az1 ovn-nbctl lr-route-list lr1 |
awk '/learned/{print $1, $2}' | sort ], [0], [dnl
Expand Down Expand Up @@ -1750,6 +1762,7 @@ check ovn-nbctl lsp-add ts ts-lr3 \

wait_for_ports_up
check ovn-ic-nbctl --wait=sb sync
check ovn-ic-nbctl --wait=sb sync

ovn_as az1
check ovn-nbctl lsp-set-options ts-lr2 requested-chassis=hv2
Expand Down Expand Up @@ -1970,6 +1983,7 @@ check ovn-nbctl lsp-add ts ts-lr3 \

wait_for_ports_up
check ovn-ic-nbctl --wait=sb sync
check ovn-ic-nbctl --wait=sb sync
ovn_as az1
check ovn-nbctl lsp-set-options ts-lr2 requested-chassis=hv2
check ovn-nbctl lsp-set-options ts-lr3 requested-chassis=hv2
Expand Down Expand Up @@ -2223,6 +2237,7 @@ check ovn-nbctl lsp-add ts ts-lr3 \

wait_for_ports_up
check ovn-ic-nbctl --wait=sb sync
check ovn-ic-nbctl --wait=sb sync
ovn_as az1
check ovn-nbctl lsp-set-options ts-lr2 requested-chassis=hv2
check ovn-nbctl lsp-set-options ts-lr3 requested-chassis=hv2
Expand Down

0 comments on commit d81738a

Please sign in to comment.