From 24bcf4be328f75fb6987ffe9f5502c8b0c523b2e Mon Sep 17 00:00:00 2001 From: "Nana@Nvidia" <78413612+nhe-NV@users.noreply.github.com> Date: Tue, 11 Feb 2025 00:58:27 +0800 Subject: [PATCH] Clean up the fdb on the sonic fanout in fdb test (#15878) Fanout switch should not learn any FDB entries at all, but it learns tens of thousands of them. FDB table for fanout is much bigger than FDB table for DUT. Replace 'show mac' with 'fdbshow' to improve the efficiency In this case, when do shutdown of the port from DUT, the port on the fanout will also go down. FDB flush also executed on this port on fanout, if there is too many fdb entries on the port, then during flushing following event occurred on fanout: INFO database#supervisord: redis 40:M 24 Oct 2024 15:23:08.510 #Lua slow script detected: still in execution after 6524 milliseconds. You can try killing the script using the SCRIPT KILL command. Script SHA1 is: 7acccfabe7fbd17d9a74e91c34de49c51d70749b ERR pmon#psud: :- checkReplyType: Expected to get redis type 3 got type 6, err: BUSY Redis is busy running a script. You can only call SCRIPT KILL or SHUTDOWN NOSAVE. Script fdb_flush.lua takes a long time to execute because it goes through all FDB entries in redis (This problem already described in community bug [warm-reboot] apps crash due to redis is busy running 'table_dump.lua' during warm-start #3008. It is generic problem for any type of entries, no only FDB ) Finally on fanout will have following err: INFO swss#supervisord 2024-10-24 15:23:10,233 INFO exited: orchagent (terminated by SIGABRT (core dumped); not expected) It will cause Dockers go to restart and on dut ports will also go to DOWN state for a few seconds. In the fdb test case, we have the logic to cleanup the fdb entry on the dut, at the same time we also need to clean the fdb on fanout to avoid issue happen. Update mac move test to make it stable at dualtor aa setup 1.Replace 'show mac' with 'fdbshow' to improve the efficiency 2.Increase the fdb check interval to make it more stable at dualtor aa setup Change-Id: I5f9e15c69fb3eb4353fca0d504c5e14961e1f675 --- tests/fdb/test_fdb.py | 4 ++-- tests/fdb/test_fdb_flush.py | 9 +++++---- tests/fdb/test_fdb_mac_move.py | 9 +++++---- tests/fdb/utils.py | 18 ++++++++++++------ 4 files changed, 24 insertions(+), 16 deletions(-) diff --git a/tests/fdb/test_fdb.py b/tests/fdb/test_fdb.py index 7fd165aeab3..af268760833 100644 --- a/tests/fdb/test_fdb.py +++ b/tests/fdb/test_fdb.py @@ -322,10 +322,10 @@ def setup_active_active_ports(active_active_ports, rand_selected_dut, rand_unsel @pytest.mark.po2vlan def test_fdb(ansible_adhoc, ptfadapter, duthosts, rand_one_dut_hostname, ptfhost, pkt_type, toggle_all_simulator_ports_to_rand_selected_tor_m, record_mux_status, # noqa F811 - setup_active_active_ports, get_dummay_mac_count): # noqa F811 + setup_active_active_ports, get_dummay_mac_count, fanouthosts): # noqa F811 # Perform FDB clean up before each test and at the end of the final test - fdb_cleanup(duthosts, rand_one_dut_hostname) + fdb_cleanup(duthosts, rand_one_dut_hostname, fanouthosts) if pkt_type == "cleanup": return diff --git a/tests/fdb/test_fdb_flush.py b/tests/fdb/test_fdb_flush.py index 82594dcf620..08832b574e0 100644 --- a/tests/fdb/test_fdb_flush.py +++ b/tests/fdb/test_fdb_flush.py @@ -211,11 +211,11 @@ def prepareDut(self, request, duthosts, rand_one_dut_hostname): self.__loadSwssConfig(duthost) self.__deleteTmpSwitchConfig(duthost) - def prepare_test(self, duthosts, rand_one_dut_hostname): + def prepare_test(self, duthosts, rand_one_dut_hostname, fanouthosts): logging.info("Start prepare_test") # Perform FDB clean up before each test - fdb_cleanup(duthosts, rand_one_dut_hostname) + fdb_cleanup(duthosts, rand_one_dut_hostname, fanouthosts) duthost = duthosts[rand_one_dut_hostname] @@ -342,10 +342,11 @@ def static_fdb_oper(self, duthost, fdb_oper_file): duthost.shell("docker exec -i swss swssconfig {}".format(fdb_oper_file), module_ignore_errors=True) @pytest.mark.parametrize("flush_type", FLUSH_TYPES) - def testFdbFlush(self, ptfadapter, duthosts, rand_one_dut_hostname, ptfhost, tbinfo, request, flush_type): + def testFdbFlush(self, ptfadapter, duthosts, rand_one_dut_hostname, ptfhost, tbinfo, request, flush_type, + fanouthosts): logging.info("test type {} ".format(flush_type)) - self.prepare_test(duthosts, rand_one_dut_hostname) + self.prepare_test(duthosts, rand_one_dut_hostname, fanouthosts) if "dynamic" == flush_type or "mix" == flush_type: self.dynamic_fdb_oper(duthosts[rand_one_dut_hostname], tbinfo, ptfhost, 'create') diff --git a/tests/fdb/test_fdb_mac_move.py b/tests/fdb/test_fdb_mac_move.py index 24d56f2998c..e57bca62e60 100644 --- a/tests/fdb/test_fdb_mac_move.py +++ b/tests/fdb/test_fdb_mac_move.py @@ -56,10 +56,11 @@ def get_fdb_dict(ptfadapter, vlan_table, dummay_mac_count): return fdb -def test_fdb_mac_move(ptfadapter, duthosts, rand_one_dut_hostname, ptfhost, get_function_completeness_level, - rotate_syslog): +def test_fdb_mac_move(ptfadapter, duthosts, fanouthosts, rand_one_dut_hostname, ptfhost, + get_function_completeness_level, rotate_syslog): + # Perform FDB clean up before each test - fdb_cleanup(duthosts, rand_one_dut_hostname) + fdb_cleanup(duthosts, rand_one_dut_hostname, fanouthosts) normalized_level = get_function_completeness_level if normalized_level is None: @@ -135,6 +136,6 @@ def test_fdb_mac_move(ptfadapter, duthosts, rand_one_dut_hostname, ptfhost, get_ # Flush dataplane ptfadapter.dataplane.flush() time.sleep(10) - fdb_cleanup(duthosts, rand_one_dut_hostname) + fdb_cleanup(duthosts, rand_one_dut_hostname, fanouthosts) # Wait for 10 seconds before starting next loop time.sleep(10) diff --git a/tests/fdb/utils.py b/tests/fdb/utils.py index 877ec74b1ea..f5495cf603f 100644 --- a/tests/fdb/utils.py +++ b/tests/fdb/utils.py @@ -41,8 +41,8 @@ def get_crm_resources(duthost, resource, status): def get_fdb_dynamic_mac_count(duthost): - res = duthost.command('show mac') - logger.info('"show mac" output on DUT:\n{}'.format(pprint.pformat(res['stdout_lines']))) + res = duthost.command('fdbshow') + logger.info('"fdbshow" output on DUT:\n{}'.format(pprint.pformat(res['stdout_lines']))) total_mac_count = 0 for output_mac in res['stdout_lines']: if "dynamic" in output_mac.lower() and BASE_MAC_PREFIX in output_mac.lower(): @@ -51,8 +51,8 @@ def get_fdb_dynamic_mac_count(duthost): def fdb_table_has_dummy_mac_for_interface(duthost, interface, dummy_mac_prefix=""): - res = duthost.command('show mac') - logger.info('"show mac" output on DUT:\n{}'.format(pprint.pformat(res['stdout_lines']))) + res = duthost.command('fdbshow') + logger.info('"fdbshow" output on DUT:\n{}'.format(pprint.pformat(res['stdout_lines']))) for output_mac in res['stdout_lines']: if (interface in output_mac and (dummy_mac_prefix in output_mac or dummy_mac_prefix == "")): return True @@ -63,14 +63,20 @@ def fdb_table_has_no_dynamic_macs(duthost): return (get_fdb_dynamic_mac_count(duthost) == 0) -def fdb_cleanup(duthosts, rand_one_dut_hostname): +def fdb_cleanup(duthosts, rand_one_dut_hostname, fanouthosts={}): """ cleanup FDB before and after test run """ + for fanouthost in fanouthosts.values(): + if fanouthost.os == 'sonic': + if fdb_table_has_no_dynamic_macs(fanouthost): + continue + fanouthost.command('sonic-clear fdb all') + duthost = duthosts[rand_one_dut_hostname] if fdb_table_has_no_dynamic_macs(duthost): return else: duthost.command('sonic-clear fdb all') - pytest_assert(wait_until(100, 2, 0, fdb_table_has_no_dynamic_macs, duthost), "FDB Table Cleanup failed") + pytest_assert(wait_until(100, 5, 0, fdb_table_has_no_dynamic_macs, duthost), "FDB Table Cleanup failed") def simple_eth_packet(