Skip to content

Commit

Permalink
Clean up the fdb on the sonic fanout in fdb test (sonic-net#15878)
Browse files Browse the repository at this point in the history
Fanout switch should not learn any FDB entries at all, but it learns tens of thousands of them.  FDB table for fanout is much bigger than FDB table for DUT.
Replace 'show mac' with 'fdbshow' to improve the efficiency

In this case, when do shutdown of the port from DUT, the port on the fanout will also go down. FDB flush also executed on this port on fanout, if there is too many fdb entries on the port, then during flushing following event occurred on fanout:
INFO database#supervisord: redis 40:M 24 Oct 2024 15:23:08.510 #Lua slow script detected: still in execution after 6524 milliseconds. You can try killing the script using the SCRIPT KILL command. Script SHA1 is: 7acccfabe7fbd17d9a74e91c34de49c51d70749b
ERR pmon#psud: :- checkReplyType: Expected to get redis type 3 got type 6, err: BUSY Redis is busy running a script. You can only call SCRIPT KILL or SHUTDOWN NOSAVE.

Script fdb_flush.lua takes a long time to execute because it goes through all FDB entries in redis

(This problem already described in community bug [warm-reboot] apps crash due to redis is busy running 'table_dump.lua' during warm-start sonic-net#3008. It is generic problem for any type of entries, no only FDB )

Finally on fanout will have following err:
INFO swss#supervisord 2024-10-24 15:23:10,233 INFO exited: orchagent (terminated by SIGABRT (core dumped); not expected)

It will cause Dockers go to restart and on dut  ports will  also go to DOWN state for a few seconds.

In the fdb test case, we have the logic to cleanup the fdb entry on the dut, at the same time we also need to clean the fdb on fanout to avoid issue happen.

Update mac move test to make it stable at dualtor aa setup

1.Replace 'show mac' with 'fdbshow' to improve the efficiency
2.Increase the fdb check interval to make it more stable at dualtor aa setup

Change-Id: I5f9e15c69fb3eb4353fca0d504c5e14961e1f675
  • Loading branch information
nhe-NV authored Feb 10, 2025
1 parent 5af51b0 commit 24bcf4b
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 16 deletions.
4 changes: 2 additions & 2 deletions tests/fdb/test_fdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -322,10 +322,10 @@ def setup_active_active_ports(active_active_ports, rand_selected_dut, rand_unsel
@pytest.mark.po2vlan
def test_fdb(ansible_adhoc, ptfadapter, duthosts, rand_one_dut_hostname, ptfhost, pkt_type,
toggle_all_simulator_ports_to_rand_selected_tor_m, record_mux_status, # noqa F811
setup_active_active_ports, get_dummay_mac_count): # noqa F811
setup_active_active_ports, get_dummay_mac_count, fanouthosts): # noqa F811

# Perform FDB clean up before each test and at the end of the final test
fdb_cleanup(duthosts, rand_one_dut_hostname)
fdb_cleanup(duthosts, rand_one_dut_hostname, fanouthosts)
if pkt_type == "cleanup":
return

Expand Down
9 changes: 5 additions & 4 deletions tests/fdb/test_fdb_flush.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,11 +211,11 @@ def prepareDut(self, request, duthosts, rand_one_dut_hostname):
self.__loadSwssConfig(duthost)
self.__deleteTmpSwitchConfig(duthost)

def prepare_test(self, duthosts, rand_one_dut_hostname):
def prepare_test(self, duthosts, rand_one_dut_hostname, fanouthosts):
logging.info("Start prepare_test")

# Perform FDB clean up before each test
fdb_cleanup(duthosts, rand_one_dut_hostname)
fdb_cleanup(duthosts, rand_one_dut_hostname, fanouthosts)

duthost = duthosts[rand_one_dut_hostname]

Expand Down Expand Up @@ -342,10 +342,11 @@ def static_fdb_oper(self, duthost, fdb_oper_file):
duthost.shell("docker exec -i swss swssconfig {}".format(fdb_oper_file), module_ignore_errors=True)

@pytest.mark.parametrize("flush_type", FLUSH_TYPES)
def testFdbFlush(self, ptfadapter, duthosts, rand_one_dut_hostname, ptfhost, tbinfo, request, flush_type):
def testFdbFlush(self, ptfadapter, duthosts, rand_one_dut_hostname, ptfhost, tbinfo, request, flush_type,
fanouthosts):

logging.info("test type {} ".format(flush_type))
self.prepare_test(duthosts, rand_one_dut_hostname)
self.prepare_test(duthosts, rand_one_dut_hostname, fanouthosts)

if "dynamic" == flush_type or "mix" == flush_type:
self.dynamic_fdb_oper(duthosts[rand_one_dut_hostname], tbinfo, ptfhost, 'create')
Expand Down
9 changes: 5 additions & 4 deletions tests/fdb/test_fdb_mac_move.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,10 +56,11 @@ def get_fdb_dict(ptfadapter, vlan_table, dummay_mac_count):
return fdb


def test_fdb_mac_move(ptfadapter, duthosts, rand_one_dut_hostname, ptfhost, get_function_completeness_level,
rotate_syslog):
def test_fdb_mac_move(ptfadapter, duthosts, fanouthosts, rand_one_dut_hostname, ptfhost,
get_function_completeness_level, rotate_syslog):

# Perform FDB clean up before each test
fdb_cleanup(duthosts, rand_one_dut_hostname)
fdb_cleanup(duthosts, rand_one_dut_hostname, fanouthosts)

normalized_level = get_function_completeness_level
if normalized_level is None:
Expand Down Expand Up @@ -135,6 +136,6 @@ def test_fdb_mac_move(ptfadapter, duthosts, rand_one_dut_hostname, ptfhost, get_
# Flush dataplane
ptfadapter.dataplane.flush()
time.sleep(10)
fdb_cleanup(duthosts, rand_one_dut_hostname)
fdb_cleanup(duthosts, rand_one_dut_hostname, fanouthosts)
# Wait for 10 seconds before starting next loop
time.sleep(10)
18 changes: 12 additions & 6 deletions tests/fdb/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@ def get_crm_resources(duthost, resource, status):


def get_fdb_dynamic_mac_count(duthost):
res = duthost.command('show mac')
logger.info('"show mac" output on DUT:\n{}'.format(pprint.pformat(res['stdout_lines'])))
res = duthost.command('fdbshow')
logger.info('"fdbshow" output on DUT:\n{}'.format(pprint.pformat(res['stdout_lines'])))
total_mac_count = 0
for output_mac in res['stdout_lines']:
if "dynamic" in output_mac.lower() and BASE_MAC_PREFIX in output_mac.lower():
Expand All @@ -51,8 +51,8 @@ def get_fdb_dynamic_mac_count(duthost):


def fdb_table_has_dummy_mac_for_interface(duthost, interface, dummy_mac_prefix=""):
res = duthost.command('show mac')
logger.info('"show mac" output on DUT:\n{}'.format(pprint.pformat(res['stdout_lines'])))
res = duthost.command('fdbshow')
logger.info('"fdbshow" output on DUT:\n{}'.format(pprint.pformat(res['stdout_lines'])))
for output_mac in res['stdout_lines']:
if (interface in output_mac and (dummy_mac_prefix in output_mac or dummy_mac_prefix == "")):
return True
Expand All @@ -63,14 +63,20 @@ def fdb_table_has_no_dynamic_macs(duthost):
return (get_fdb_dynamic_mac_count(duthost) == 0)


def fdb_cleanup(duthosts, rand_one_dut_hostname):
def fdb_cleanup(duthosts, rand_one_dut_hostname, fanouthosts={}):
""" cleanup FDB before and after test run """
for fanouthost in fanouthosts.values():
if fanouthost.os == 'sonic':
if fdb_table_has_no_dynamic_macs(fanouthost):
continue
fanouthost.command('sonic-clear fdb all')

duthost = duthosts[rand_one_dut_hostname]
if fdb_table_has_no_dynamic_macs(duthost):
return
else:
duthost.command('sonic-clear fdb all')
pytest_assert(wait_until(100, 2, 0, fdb_table_has_no_dynamic_macs, duthost), "FDB Table Cleanup failed")
pytest_assert(wait_until(100, 5, 0, fdb_table_has_no_dynamic_macs, duthost), "FDB Table Cleanup failed")


def simple_eth_packet(
Expand Down

0 comments on commit 24bcf4b

Please sign in to comment.