From 6c839d357a635f83b6f61a3a2287f83d379cf7d3 Mon Sep 17 00:00:00 2001 From: Wataru Ishida Date: Fri, 14 Jun 2024 02:01:24 +0000 Subject: [PATCH] fix(xcvrd): handle port add/remove events in CmisManagerTask This commit fixes DPB support with CMIS transceivers. CmisManagerTask's `port_dict` must be updated according to the port add/remove events. This commit removes the `port_mapping` field from CmisManagerTask as `port_mapping` was mostly used just for storing `asic_id` information and that can be simply done by `port_dict` instead. Added a helper method `get_asic_id()` method to CmisManagerTask for getting `asic_id` from `logical_port`. fixes https://github.com/sonic-net/sonic-buildimage/issues/18893 Signed-off-by: Wataru Ishida --- sonic-xcvrd/tests/test_xcvrd.py | 359 ++++++++++++++++++++++++++++---- sonic-xcvrd/xcvrd/xcvrd.py | 66 +++--- 2 files changed, 351 insertions(+), 74 deletions(-) diff --git a/sonic-xcvrd/tests/test_xcvrd.py b/sonic-xcvrd/tests/test_xcvrd.py index 400b72fdc..83e28fe1f 100644 --- a/sonic-xcvrd/tests/test_xcvrd.py +++ b/sonic-xcvrd/tests/test_xcvrd.py @@ -189,6 +189,7 @@ def test_CmisManagerTask_task_run_with_exception(self): @patch('xcvrd.xcvrd.CmisManagerTask.wait_for_port_config_done', MagicMock()) @patch('xcvrd.xcvrd.is_fast_reboot_enabled', MagicMock(return_value=(False))) @patch('xcvrd.xcvrd.get_cmis_application_desired', MagicMock(side_effect=KeyError)) + @patch('xcvrd.xcvrd.is_cmis_api', MagicMock(return_value=True)) @patch('xcvrd.xcvrd.log_exception_traceback') @patch('xcvrd.xcvrd.XcvrTableHelper.get_status_tbl') @patch('xcvrd.xcvrd.platform_chassis') @@ -211,9 +212,12 @@ def test_CmisManagerTask_get_xcvr_api_exception(self, mock_platform_chassis, moc # Case 1: get_xcvr_api() raises an exception task.on_port_update_event(port_change_event) mock_sfp.get_xcvr_api = MagicMock(side_effect=NotImplementedError) - task.task_worker() + assert len(task.port_dict) == 1 + task.task_stopping_event.is_set = MagicMock(side_effect=[False, False, True]) + task.do_task(False) + assert mock_log_exception_traceback.call_count == 1 - assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.port_mapping.get_asic_id_for_logical_port('Ethernet0'))) == CMIS_STATE_FAILED + assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_FAILED # Case 2: is_flat_memory() raises AttributeError. In this case, CMIS SM should transition to READY state mock_xcvr_api = MagicMock() @@ -221,21 +225,70 @@ def test_CmisManagerTask_get_xcvr_api_exception(self, mock_platform_chassis, moc mock_xcvr_api.is_flat_memory = MagicMock(side_effect=AttributeError) task.task_stopping_event.is_set = MagicMock(side_effect=[False, False, True]) task.on_port_update_event(port_change_event) - task.task_worker() - assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.port_mapping.get_asic_id_for_logical_port('Ethernet0'))) == CMIS_STATE_READY + assert len(task.port_dict) == 1 + task.task_stopping_event.is_set = MagicMock(side_effect=[False, False, True]) + task.do_task(False) + assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_READY # Case 3: get_cmis_application_desired() raises an exception mock_xcvr_api.is_flat_memory = MagicMock(return_value=False) mock_xcvr_api.is_coherent_module = MagicMock(return_value=False) mock_xcvr_api.get_module_type_abbreviation = MagicMock(return_value='QSFP-DD') - task.task_stopping_event.is_set = MagicMock(side_effect=[False, False, True]) task.on_port_update_event(port_change_event) task.get_cmis_host_lanes_mask = MagicMock() - task.task_worker() + assert len(task.port_dict) == 1 + task.task_stopping_event.is_set = MagicMock(side_effect=[False, False, True]) + task.do_task(False) assert mock_log_exception_traceback.call_count == 2 - assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.port_mapping.get_asic_id_for_logical_port('Ethernet0'))) == CMIS_STATE_FAILED + assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_FAILED assert task.get_cmis_host_lanes_mask.call_count == 0 + # Case 4: get_xcvr_api() returns None + task.on_port_update_event(port_change_event) + assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_INSERTED + mock_sfp.get_xcvr_api.return_value = None + assert len(task.port_dict) == 1 + task.task_stopping_event.is_set = MagicMock(side_effect=[False, False, True]) + task.do_task(False) + assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_READY + + # Case 5: is_flat_memory() returns True. In this case, CMIS SM should transition to READY state + task.on_port_update_event(port_change_event) + assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_INSERTED + mock_xcvr_api = MagicMock() + mock_xcvr_api.is_flat_memory.return_value = True + mock_sfp.get_xcvr_api.return_value = mock_xcvr_api + assert len(task.port_dict) == 1 + task.task_stopping_event.is_set = MagicMock(side_effect=[False, False, True]) + task.do_task(False) + assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_READY + + # Case 6: get_module_type_abbreviation() reutrns non-cmis module type + task.on_port_update_event(port_change_event) + assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_INSERTED + mock_xcvr_api = MagicMock() + mock_xcvr_api.is_flat_memory.return_value = False + mock_xcvr_api.get_module_type_abbreviation.return_value = 'SFP' + mock_sfp.get_xcvr_api.return_value = mock_xcvr_api + assert len(task.port_dict) == 1 + task.task_stopping_event.is_set = MagicMock(side_effect=[False, False, True]) + task.do_task(False) + assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_READY + + # Case 7: too many retries + task.on_port_update_event(port_change_event) + assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_INSERTED + mock_xcvr_api = MagicMock() + mock_xcvr_api.is_flat_memory.return_value = False + mock_xcvr_api.get_module_type_abbreviation.return_value = 'QSFP-DD' + mock_xcvr_api.is_coherent_module.return_value = False + mock_sfp.get_xcvr_api.return_value = mock_xcvr_api + assert len(task.port_dict) == 1 + for lport, info in task.port_dict.items(): + info["cmis_retries"] = task.CMIS_MAX_RETRIES + 1 + task.handle_cmis_state_machine(lport, info, False) + assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_FAILED + @patch('xcvrd.xcvrd_utilities.port_event_helper.subscribe_port_config_change', MagicMock(side_effect = NotImplementedError)) def test_DomInfoUpdateTask_task_run_with_exception(self): port_mapping = PortMapping() @@ -296,7 +349,7 @@ def test_SfpStateUpdateTask_task_run_with_exception(self): @patch('xcvrd.xcvrd.SffManagerTask.join') def test_DaemonXcvrd_run_with_exception(self, mock_task_join_sff, mock_task_join_sfp, mock_task_join_dom, mock_init, mock_os_kill): - mock_init.return_value = (PortMapping(), set()) + mock_init.return_value = PortMapping() xcvrd = DaemonXcvrd(SYSLOG_IDENTIFIER) xcvrd.enable_sff_mgr = True xcvrd.load_feature_flags = MagicMock() @@ -1198,7 +1251,7 @@ def test_DaemonXcvrd_wait_for_port_config_done(self, mock_select, mock_sub_table @patch('xcvrd.xcvrd.DomInfoUpdateTask.join') @patch('xcvrd.xcvrd.SfpStateUpdateTask.join') def test_DaemonXcvrd_run(self, mock_task_stop1, mock_task_stop2, mock_task_run1, mock_task_run2, mock_deinit, mock_init): - mock_init.return_value = (PortMapping(), set()) + mock_init.return_value = PortMapping() xcvrd = DaemonXcvrd(SYSLOG_IDENTIFIER) xcvrd.load_feature_flags = MagicMock() xcvrd.stop_event.wait = MagicMock() @@ -1480,22 +1533,97 @@ def test_CmisManagerTask_handle_port_change_event(self): task.on_port_update_event(port_change_event) assert task.isPortConfigDone + assert len(task.port_dict) == 0 + assert len(task.deleted_ports) == 0 + # CmisManagerTask doesn't handle PORT_ADD event. + # No change on port_dict port_change_event = PortChangeEvent('Ethernet0', 1, 0, PortChangeEvent.PORT_ADD) task.on_port_update_event(port_change_event) assert len(task.port_dict) == 0 + assert len(task.deleted_ports) == 0 + # CmisManagerTask doesn't handle PORT_REMOVE event. + # No change on port_dict port_change_event = PortChangeEvent('Ethernet0', 1, 0, PortChangeEvent.PORT_REMOVE) task.on_port_update_event(port_change_event) assert len(task.port_dict) == 0 + assert len(task.deleted_ports) == 0 + # PORT_DEL event before handling PORT_SET + # No change on port_dict port_change_event = PortChangeEvent('Ethernet0', 1, 0, PortChangeEvent.PORT_DEL) task.on_port_update_event(port_change_event) - assert len(task.port_dict) == 1 + assert len(task.port_dict) == 0 + assert len(task.deleted_ports) == 0 port_change_event = PortChangeEvent('Ethernet0', 1, 0, PortChangeEvent.PORT_SET) task.on_port_update_event(port_change_event) assert len(task.port_dict) == 1 + assert len(task.deleted_ports) == 0 + + # PORT_DEL event after handling PORT_SET + # No change on port_dict + # deleted_ports should have been updated + port_change_event = PortChangeEvent('Ethernet0', 1, 0, PortChangeEvent.PORT_DEL) + task.on_port_update_event(port_change_event) + assert len(task.port_dict) == 1 + assert len(task.deleted_ports) == 1 + + @patch('xcvrd.xcvrd.XcvrTableHelper.get_status_tbl') + @patch('xcvrd.xcvrd.XcvrTableHelper.get_state_port_tbl') + @patch('xcvrd.xcvrd.XcvrTableHelper.get_cfg_port_tbl') + @patch('xcvrd.xcvrd.platform_chassis') + def test_CmisManagerTask_do_task(self, mock_chassis, mock_get_cfg_port_tbl, mock_get_state_port_tbl, mock_get_status_tbl): + mock_get_status_tbl = Table("STATE_DB", TRANSCEIVER_STATUS_TABLE) + mock_get_state_port_tbl = Table("STATE_DB", "PORT_TABLE") + mock_get_cfg_port_tbl.get.return_value = (True, {"admin_status": "up"}) + mock_chassis.get_sfp.return_value.get_presence.return_value = False # sfp not present + + port_mapping = PortMapping() + stop_event = threading.Event() + task = CmisManagerTask(DEFAULT_NAMESPACE, port_mapping, stop_event) + task.xcvr_table_helper.get_status_tbl.return_value = mock_get_status_tbl + task.xcvr_table_helper.get_state_port_tbl.return_value = mock_get_state_port_tbl + task.xcvr_table_helper.get_cfg_port_tbl.return_value = mock_get_cfg_port_tbl + + assert not task.isPortConfigDone + port_change_event = PortChangeEvent('PortConfigDone', -1, 0, PortChangeEvent.PORT_SET) + task.on_port_update_event(port_change_event) + assert task.isPortConfigDone + assert len(task.port_dict) == 0 + + # empty lanes + port_change_event = PortChangeEvent('Ethernet0', 1, 0, PortChangeEvent.PORT_SET, {"lanes": ""}) + task.on_port_update_event(port_change_event) + assert len(task.port_dict) == 1 + + assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_INSERTED + task.do_task(False) + + # CMIS state doesn't change + assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_INSERTED + + port_change_event = PortChangeEvent('Ethernet0', 1, 0, PortChangeEvent.PORT_SET, {"lanes": "1", "speed": "1000000"}) + task.on_port_update_event(port_change_event) + task.do_task(False) + + # CMIS state goes to REMOVED + assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_REMOVED + mock_chassis.get_sfp.return_value.get_presence.return_value = True # sfp present + + port_change_event = PortChangeEvent('Ethernet0', 1, 0, PortChangeEvent.PORT_SET, {"lanes": "1", "speed": "1000000"}) + task.on_port_update_event(port_change_event) + task.do_task(False) + + assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_READY + port_change_event = PortChangeEvent('Ethernet0', -1, 0, PortChangeEvent.PORT_DEL) + task.on_port_update_event(port_change_event) + assert len(task.port_dict) == 1 + assert len(task.deleted_ports) == 1 + task.do_task(False) + assert len(task.deleted_ports) == 0 + assert len(task.port_dict) == 0 @patch('xcvrd.xcvrd.XcvrTableHelper') def test_CmisManagerTask_get_configured_freq(self, mock_table_helper): @@ -1519,6 +1647,51 @@ def test_CmisManagerTask_get_configured_tx_power_from_db(self, mock_table_helper task.xcvr_table_helper.get_cfg_port_tbl = mock_table_helper.get_cfg_port_tbl assert task.get_configured_tx_power_from_db('Ethernet0') == -10 + @patch('xcvrd.xcvrd.XcvrTableHelper') + def test_CmisManagerTask_get_host_tx_status(self, mock_table_helper): + port_mapping = PortMapping() + stop_event = threading.Event() + task = CmisManagerTask(DEFAULT_NAMESPACE, port_mapping, stop_event) + state_port_tbl = MagicMock() + state_port_tbl.get = MagicMock(return_value=(False, None)) + mock_table_helper.get_state_port_tbl = MagicMock(return_value=state_port_tbl) + task.xcvr_table_helper.get_state_port_tbl = mock_table_helper.get_state_port_tbl + assert task.get_host_tx_status('Ethernet0') == 'false' + state_port_tbl = MagicMock() + state_port_tbl.get = MagicMock(return_value=(True, (('host_tx_ready', 'true'),))) + mock_table_helper.get_state_port_tbl = MagicMock(return_value=state_port_tbl) + task.xcvr_table_helper.get_state_port_tbl = mock_table_helper.get_state_port_tbl + assert task.get_host_tx_status('Ethernet0') == 'true' + + @patch('xcvrd.xcvrd.XcvrTableHelper') + def test_CmisManagerTask_get_port_admin_status(self, mock_table_helper): + port_mapping = PortMapping() + stop_event = threading.Event() + task = CmisManagerTask(DEFAULT_NAMESPACE, port_mapping, stop_event) + cfg_port_tbl = MagicMock() + cfg_port_tbl.get = MagicMock(return_value=(False, None)) + mock_table_helper.get_cfg_port_tbl = MagicMock(return_value=cfg_port_tbl) + task.xcvr_table_helper.get_cfg_port_tbl = mock_table_helper.get_cfg_port_tbl + assert task.get_port_admin_status('Ethernet0') == 'down' + cfg_port_tbl = MagicMock() + cfg_port_tbl.get = MagicMock(return_value=(True, (('admin_status', 'up'),))) + mock_table_helper.get_cfg_port_tbl = MagicMock(return_value=cfg_port_tbl) + task.xcvr_table_helper.get_cfg_port_tbl = mock_table_helper.get_cfg_port_tbl + assert task.get_port_admin_status('Ethernet0') == 'up' + + def test_CmisManagerTask_configre_tx_output_power(self): + mock_xcvr_api = MagicMock() + mock_xcvr_api.get_supported_power_config = MagicMock(return_value=(-10, 4)) + mock_xcvr_api.set_tx_power = MagicMock(return_value=1) + port_mapping = PortMapping() + stop_event = threading.Event() + task = CmisManagerTask(DEFAULT_NAMESPACE, port_mapping, stop_event) + assert task.configure_tx_output_power(mock_xcvr_api, 'Ethernet0', 0) == 1 + # setting power out of range is currently not an error + assert task.configure_tx_output_power(mock_xcvr_api, 'Ethernet0', -20) == 1 + # setting power out of range is currently not an error + assert task.configure_tx_output_power(mock_xcvr_api, 'Ethernet0', 10) == 1 + @patch('xcvrd.xcvrd.platform_chassis') @patch('xcvrd.xcvrd.is_fast_reboot_enabled', MagicMock(return_value=(False))) @patch('xcvrd.xcvrd.PortChangeObserver', MagicMock(handle_port_update_event=MagicMock())) @@ -1890,24 +2063,33 @@ def test_CmisManagerTask_task_worker(self, mock_chassis, mock_get_status_tbl): mock_chassis.get_sfp = MagicMock(return_value=mock_sfp) port_mapping = PortMapping() + port_mapping.handle_port_change_event(PortChangeEvent('Ethernet0', 1, 0, PortChangeEvent.PORT_ADD)) stop_event = threading.Event() task = CmisManagerTask(DEFAULT_NAMESPACE, port_mapping, stop_event) - task.port_mapping.logical_port_list = ['Ethernet0'] task.xcvr_table_helper.get_status_tbl.return_value = mock_get_status_tbl task.task_stopping_event.is_set = MagicMock(side_effect=[False, False, True]) task.task_worker() - assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.port_mapping.get_asic_id_for_logical_port('Ethernet0'))) == CMIS_STATE_UNKNOWN + assert len(task.port_dict) == 1 + assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_UNKNOWN - task.port_mapping.logical_port_list = MagicMock() port_change_event = PortChangeEvent('PortConfigDone', -1, 0, PortChangeEvent.PORT_SET) task.on_port_update_event(port_change_event) assert task.isPortConfigDone + port_change_event = PortChangeEvent('PortInitDone', -1, 0, PortChangeEvent.PORT_SET) + task.on_port_update_event(port_change_event) + assert task.isPortInitDone + + # non-physical port is silently ignored + port_change_event = PortChangeEvent('PortChannel1', -1, 0, PortChangeEvent.PORT_SET) + task.on_port_update_event(port_change_event) + assert len(task.port_dict) == 1 + port_change_event = PortChangeEvent('Ethernet0', 1, 0, PortChangeEvent.PORT_SET, {'speed':'400000', 'lanes':'1,2,3,4,5,6,7,8'}) task.on_port_update_event(port_change_event) assert len(task.port_dict) == 1 - assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.port_mapping.get_asic_id_for_logical_port('Ethernet0'))) == CMIS_STATE_INSERTED + assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_INSERTED task.get_host_tx_status = MagicMock(return_value='true') task.get_port_admin_status = MagicMock(return_value='up') @@ -1920,51 +2102,50 @@ def test_CmisManagerTask_task_worker(self, mock_chassis, mock_get_status_tbl): task.is_appl_reconfigure_required = MagicMock(return_value=True) mock_xcvr_api.decommission_all_datapaths = MagicMock(return_value=True) task.task_stopping_event.is_set = MagicMock(side_effect=[False, False, True]) - task.task_worker() - assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.port_mapping.get_asic_id_for_logical_port('Ethernet0'))) == CMIS_STATE_DP_DEINIT + task.do_task(False) + assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_DP_DEINIT task.task_stopping_event.is_set = MagicMock(side_effect=[False, False, True]) - task.task_worker() + task.do_task(False) assert mock_xcvr_api.set_datapath_deinit.call_count == 1 assert mock_xcvr_api.tx_disable_channel.call_count == 1 assert mock_xcvr_api.set_lpmode.call_count == 1 - assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.port_mapping.get_asic_id_for_logical_port('Ethernet0'))) == CMIS_STATE_AP_CONF + assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_AP_CONF # Case 2: DP_DEINIT --> AP Configured task.task_stopping_event.is_set = MagicMock(side_effect=[False, False, True]) - task.task_worker() + task.do_task(False) assert mock_xcvr_api.set_application.call_count == 1 - assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.port_mapping.get_asic_id_for_logical_port('Ethernet0'))) == CMIS_STATE_DP_INIT + assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_DP_INIT # Case 3: AP Configured --> DP_INIT task.task_stopping_event.is_set = MagicMock(side_effect=[False, False, True]) - task.task_worker() + task.do_task(False) assert mock_xcvr_api.set_datapath_init.call_count == 1 - assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.port_mapping.get_asic_id_for_logical_port('Ethernet0'))) == CMIS_STATE_DP_TXON + assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_DP_TXON # Case 4: DP_INIT --> DP_TXON task.task_stopping_event.is_set = MagicMock(side_effect=[False, False, True]) - task.task_worker() + task.do_task(False) assert mock_xcvr_api.tx_disable_channel.call_count == 2 - assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.port_mapping.get_asic_id_for_logical_port('Ethernet0'))) == CMIS_STATE_DP_ACTIVATE + assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_DP_ACTIVATE # Case 5: DP_TXON --> DP_ACTIVATION - task.task_stopping_event.is_set = MagicMock(side_effect=[False, False, True]) task.post_port_active_apsel_to_db = MagicMock() - task.task_worker() + task.task_stopping_event.is_set = MagicMock(side_effect=[False, False, True]) + task.do_task(False) assert task.post_port_active_apsel_to_db.call_count == 1 - assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.port_mapping.get_asic_id_for_logical_port('Ethernet0'))) == CMIS_STATE_READY + assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_READY # Fail test coverage - Module Inserted state failing to reach DP_DEINIT port_mapping = PortMapping() + port_mapping.handle_port_change_event(PortChangeEvent('Ethernet1', 1, 0, PortChangeEvent.PORT_ADD)) stop_event = threading.Event() task = CmisManagerTask(DEFAULT_NAMESPACE, port_mapping, stop_event) - task.port_mapping.logical_port_list = ['Ethernet1'] task.xcvr_table_helper.get_status_tbl.return_value = mock_get_status_tbl task.task_stopping_event.is_set = MagicMock(side_effect=[False, False, True]) task.task_worker() - assert get_cmis_state_from_state_db('Ethernet1', task.xcvr_table_helper.get_status_tbl(task.port_mapping.get_asic_id_for_logical_port('Ethernet1'))) == CMIS_STATE_UNKNOWN + assert get_cmis_state_from_state_db('Ethernet1', task.xcvr_table_helper.get_status_tbl(task.get_asic_id('Ethernet1'))) == CMIS_STATE_UNKNOWN - task.port_mapping.logical_port_list = MagicMock() port_change_event = PortChangeEvent('PortConfigDone', -1, 0, PortChangeEvent.PORT_SET) task.on_port_update_event(port_change_event) assert task.isPortConfigDone @@ -1973,7 +2154,7 @@ def test_CmisManagerTask_task_worker(self, mock_chassis, mock_get_status_tbl): {'speed':'400000', 'lanes':'1,2,3,4,5,6,7,8'}) task.on_port_update_event(port_change_event) assert len(task.port_dict) == 1 - assert get_cmis_state_from_state_db('Ethernet1', task.xcvr_table_helper.get_status_tbl(task.port_mapping.get_asic_id_for_logical_port('Ethernet1'))) == CMIS_STATE_INSERTED + assert get_cmis_state_from_state_db('Ethernet1', task.xcvr_table_helper.get_status_tbl(task.get_asic_id('Ethernet1'))) == CMIS_STATE_INSERTED task.get_host_tx_status = MagicMock(return_value='true') task.get_port_admin_status = MagicMock(return_value='up') @@ -1986,8 +2167,98 @@ def test_CmisManagerTask_task_worker(self, mock_chassis, mock_get_status_tbl): task.is_appl_reconfigure_required = MagicMock(return_value=True) mock_xcvr_api.decommission_all_datapaths = MagicMock(return_value=False) task.task_stopping_event.is_set = MagicMock(side_effect=[False, False, True]) - task.task_worker() - assert not get_cmis_state_from_state_db('Ethernet1', task.xcvr_table_helper.get_status_tbl(task.port_mapping.get_asic_id_for_logical_port('Ethernet1'))) == CMIS_STATE_DP_DEINIT + task.do_task(False) + assert not get_cmis_state_from_state_db('Ethernet1', task.xcvr_table_helper.get_status_tbl(task.get_asic_id('Ethernet1'))) == CMIS_STATE_DP_DEINIT + + @patch('xcvrd.xcvrd_utilities.optics_si_parser.g_optics_si_dict', optics_si_settings_dict) + @patch('xcvrd.xcvrd.platform_chassis') + @patch('xcvrd.xcvrd.XcvrTableHelper.get_cfg_port_tbl') + @patch('xcvrd.xcvrd.XcvrTableHelper.get_state_port_tbl') + @patch('xcvrd.xcvrd.XcvrTableHelper.get_status_tbl') + def test_CmisManagerTask_do_task_with_optics_si(self, mock_get_status_tbl, mock_get_state_port_tbl, mock_get_cfg_port_tbl, mock_chassis): + mock_get_status_tbl = Table("STATE_DB", TRANSCEIVER_STATUS_TABLE) + mock_get_state_port_tbl = Table("STATE_DB", "PORT_TABLE") + mock_get_cfg_port_tbl.get.return_value = (True, {"admin_status": "up"}) + mock_sfp = MagicMock() + mock_sfp.get_presence.return_value = True # sfp present + mock_api = MagicMock() + mock_api.is_flat_memory.return_value = False + mock_api.get_module_type_abbreviation.return_value = 'QSFP-DD' + mock_api.is_coherent_module.return_value = False + + mock_sfp.get_xcvr_api.return_value = mock_api + mock_chassis.get_sfp.return_value = mock_sfp + + port_mapping = PortMapping() + stop_event = threading.Event() + task = CmisManagerTask(DEFAULT_NAMESPACE, port_mapping, stop_event) + task.xcvr_table_helper.get_status_tbl.return_value = mock_get_status_tbl + task.xcvr_table_helper.get_state_port_tbl.return_value = mock_get_state_port_tbl + task.xcvr_table_helper.get_cfg_port_tbl.return_value = mock_get_cfg_port_tbl + + port_change_event = PortChangeEvent('Ethernet0', 1, 0, PortChangeEvent.PORT_SET, + {'speed':'400000', 'lanes':'1,2,3,4,5,6,7,8', 'host_tx_ready': 'true'}) + task.on_port_update_event(port_change_event) + # forcing the state to be CMIS_STATE_DEINIT + task.update_port_transceiver_status_table_sw_cmis_state('Ethernet0', CMIS_STATE_DP_DEINIT) + assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_DP_DEINIT + task.port_dict['Ethernet0']['cmis_retries'] = 0 + task.port_dict['Ethernet0']['host_lanes_mask'] = 0xff + task.port_dict['Ethernet0']['media_lanes_mask'] = 0xff + task.port_dict['Ethernet0']['appl'] = 1 + + mock_api.tx_disable_channel.return_value = False + task.do_task(False) + assert task.port_dict['Ethernet0'].get('cmis_retries') == 1 + assert task.port_dict['Ethernet0'].get('cmis_expired') == None + + mock_api.tx_disable_channel.return_value = True + mock_api.get_datapath_deinit_duration.return_value = 1000 + mock_api.get_module_pwr_up_duration.return_value = 1000 + task.do_task(False) + assert task.port_dict['Ethernet0'].get('cmis_retries') == 1 + assert task.port_dict['Ethernet0'].get('cmis_expired') != None + + mock_api.get_module_state.return_value = 'Test' + assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_AP_CONF + + # set expired time explicitly + task.port_dict['Ethernet0']['cmis_expired'] = datetime.datetime.now() + + task.do_task(False) + + assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_INSERTED + + # forcing the state to be CMIS_STATE_AP_CONF + task.update_port_transceiver_status_table_sw_cmis_state('Ethernet0', CMIS_STATE_AP_CONF) + assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_AP_CONF + mock_api.get_module_state.return_value = 'ModuleReady' + mock_api.get_datapath_state.return_value = {f"DP{i}State": 'Test' for i in range(1, task.CMIS_MAX_HOST_LANES + 1)} + + assert task.check_datapath_state(mock_api, 0xff, ['DataPathDeactivated']) == False + + # set expired time explicitly + task.port_dict['Ethernet0']['cmis_expired'] = datetime.datetime.now() + + task.do_task(False) + assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_INSERTED + + # forcing the state to be CMIS_STATE_AP_CONF + task.update_port_transceiver_status_table_sw_cmis_state('Ethernet0', CMIS_STATE_AP_CONF) + assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_AP_CONF + mock_api.get_datapath_state.return_value = {f"DP{i}State": 'DataPathDeactivated' for i in range(1, task.CMIS_MAX_HOST_LANES + 1)} + + assert task.check_datapath_state(mock_api, 0xff, ['DataPathDeactivated']) + task.do_task(False) + + assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_DP_INIT + + mock_api.get_config_datapath_hostlane_status.return_value = {f"ConfigStatusLane{i}": 'ConfigSuccess' for i in range(1, task.CMIS_MAX_HOST_LANES + 1)} + assert task.check_config_error(mock_api, 0xff, ['ConfigSuccess']) + mock_api.get_datapath_init_duration.return_value = 1000 + + task.do_task(False) + assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_DP_TXON @patch('xcvrd.xcvrd.XcvrTableHelper.get_status_tbl') @patch('xcvrd.xcvrd.platform_chassis') @@ -2092,15 +2363,14 @@ def test_CmisManagerTask_task_worker_fastboot(self, mock_chassis, mock_get_statu mock_chassis.get_sfp = MagicMock(return_value=mock_sfp) port_mapping = PortMapping() + port_mapping.handle_port_change_event(PortChangeEvent('Ethernet0', 1, 0, PortChangeEvent.PORT_ADD)) stop_event = threading.Event() task = CmisManagerTask(DEFAULT_NAMESPACE, port_mapping, stop_event) - task.port_mapping.logical_port_list = ['Ethernet0'] task.xcvr_table_helper.get_status_tbl.return_value = mock_get_status_tbl task.task_stopping_event.is_set = MagicMock(side_effect=[False, False, True]) task.task_worker() - assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.port_mapping.get_asic_id_for_logical_port('Ethernet0'))) == CMIS_STATE_UNKNOWN + assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_UNKNOWN - task.port_mapping.logical_port_list = MagicMock() port_change_event = PortChangeEvent('PortConfigDone', -1, 0, PortChangeEvent.PORT_SET) task.on_port_update_event(port_change_event) assert task.isPortConfigDone @@ -2109,7 +2379,7 @@ def test_CmisManagerTask_task_worker_fastboot(self, mock_chassis, mock_get_statu {'speed':'400000', 'lanes':'1,2,3,4,5,6,7,8'}) task.on_port_update_event(port_change_event) assert len(task.port_dict) == 1 - assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.port_mapping.get_asic_id_for_logical_port('Ethernet0'))) == CMIS_STATE_INSERTED + assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_INSERTED task.get_host_tx_status = MagicMock(return_value='false') task.get_port_admin_status = MagicMock(return_value='up') @@ -2119,9 +2389,9 @@ def test_CmisManagerTask_task_worker_fastboot(self, mock_chassis, mock_get_statu task.configure_laser_frequency = MagicMock(return_value=1) task.task_stopping_event.is_set = MagicMock(side_effect=[False, False, True]) - task.task_worker() + task.do_task(False) - assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.port_mapping.get_asic_id_for_logical_port('Ethernet0'))) == CMIS_STATE_READY + assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_READY @patch('xcvrd.xcvrd.XcvrTableHelper.get_status_tbl') @@ -2227,15 +2497,14 @@ def test_CmisManagerTask_task_worker_host_tx_ready_false(self, mock_chassis, moc mock_chassis.get_sfp = MagicMock(return_value=mock_sfp) port_mapping = PortMapping() + port_mapping.handle_port_change_event(PortChangeEvent('Ethernet0', 1, 0, PortChangeEvent.PORT_ADD)) stop_event = threading.Event() task = CmisManagerTask(DEFAULT_NAMESPACE, port_mapping, stop_event) - task.port_mapping.logical_port_list = ['Ethernet0'] task.xcvr_table_helper.get_status_tbl.return_value = mock_get_status_tbl task.task_stopping_event.is_set = MagicMock(side_effect=[False, False, True]) task.task_worker() - assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.port_mapping.get_asic_id_for_logical_port('Ethernet0'))) == CMIS_STATE_UNKNOWN + assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_UNKNOWN - task.port_mapping.logical_port_list = MagicMock() port_change_event = PortChangeEvent('PortConfigDone', -1, 0, PortChangeEvent.PORT_SET) task.on_port_update_event(port_change_event) assert task.isPortConfigDone @@ -2244,7 +2513,7 @@ def test_CmisManagerTask_task_worker_host_tx_ready_false(self, mock_chassis, moc {'speed':'400000', 'lanes':'1,2,3,4,5,6,7,8'}) task.on_port_update_event(port_change_event) assert len(task.port_dict) == 1 - assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.port_mapping.get_asic_id_for_logical_port('Ethernet0'))) == CMIS_STATE_INSERTED + assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_INSERTED task.get_host_tx_status = MagicMock(return_value='false') task.get_port_admin_status = MagicMock(return_value='up') @@ -2254,10 +2523,10 @@ def test_CmisManagerTask_task_worker_host_tx_ready_false(self, mock_chassis, moc task.configure_laser_frequency = MagicMock(return_value=1) task.task_stopping_event.is_set = MagicMock(side_effect=[False, False, True]) - task.task_worker() + task.do_task(False) assert mock_xcvr_api.tx_disable_channel.call_count == 1 - assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.port_mapping.get_asic_id_for_logical_port('Ethernet0'))) == CMIS_STATE_READY + assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_READY @pytest.mark.parametrize("lport, expected_dom_polling", [ ('Ethernet0', 'disabled'), diff --git a/sonic-xcvrd/xcvrd/xcvrd.py b/sonic-xcvrd/xcvrd/xcvrd.py index f8130df20..396c0aa67 100644 --- a/sonic-xcvrd/xcvrd/xcvrd.py +++ b/sonic-xcvrd/xcvrd/xcvrd.py @@ -817,8 +817,8 @@ def __init__(self, namespaces, port_mapping, main_thread_stop_event, skip_cmis_m self.exc = None self.task_stopping_event = threading.Event() self.main_thread_stop_event = main_thread_stop_event - self.port_dict = {} - self.port_mapping = copy.deepcopy(port_mapping) + self.port_dict = {k: {"asic_id": v} for k, v in port_mapping.logical_to_asic.items()} + self.deleted_ports = {} self.xcvr_table_helper = XcvrTableHelper(namespaces) self.isPortInitDone = False self.isPortConfigDone = False @@ -834,9 +834,11 @@ def log_notice(self, message): def log_error(self, message): helper_logger.log_error("CMIS: {}".format(message)) + def get_asic_id(self, lport): + return self.port_dict.get(lport, {}).get("asic_id", -1) + def update_port_transceiver_status_table_sw_cmis_state(self, lport, cmis_state_to_set): - asic_index = self.port_mapping.get_asic_id_for_logical_port(lport) - status_table = self.xcvr_table_helper.get_status_tbl(asic_index) + status_table = self.xcvr_table_helper.get_status_tbl(self.get_asic_id(lport)) if status_table is None: helper_logger.log_error("status_table is None while updating " "sw CMIS state for lport {}".format(lport)) @@ -850,6 +852,7 @@ def on_port_update_event(self, port_change_event): return lport = port_change_event.port_name + # 'index' can be -1 if STATE_DB|PORT_TABLE pport = port_change_event.port_index if lport in ['PortInitDone']: @@ -868,15 +871,13 @@ def on_port_update_event(self, port_change_event): if pport is None: return - # Skip if the port/cage type is not a CMIS - # 'index' can be -1 if STATE_DB|PORT_TABLE - if lport not in self.port_dict: - self.port_dict[lport] = {} + if port_change_event.event_type == port_change_event.PORT_SET: + if lport not in self.port_dict: + self.port_dict[lport] = {"asic_id": port_change_event.asic_id} - if port_change_event.port_dict is None: - return + if port_change_event.port_dict is None: + return - if port_change_event.event_type == port_change_event.PORT_SET: if pport >= 0: self.port_dict[lport]['index'] = pport if 'speed' in port_change_event.port_dict and port_change_event.port_dict['speed'] != 'N/A': @@ -896,7 +897,13 @@ def on_port_update_event(self, port_change_event): self.force_cmis_reinit(lport, 0) else: + # PORT_DEL event for the same lport happens 3 times because + # we are subscribing to CONFIG_DB, STATE_DB|TRANSCEIVER_INFO, and STATE_DB|PORT_TABLE. + # We only handle the first one and ignore the rest. + if lport in self.deleted_ports or lport not in self.port_dict: + return self.update_port_transceiver_status_table_sw_cmis_state(lport, CMIS_STATE_REMOVED) + self.deleted_ports[lport] = port_change_event def get_cmis_dp_init_duration_secs(self, api): return api.get_datapath_init_duration()/1000 @@ -1160,8 +1167,7 @@ def get_configured_laser_freq_from_db(self, lport): Return the Tx power configured by user in CONFIG_DB's PORT table """ freq = 0 - asic_index = self.port_mapping.get_asic_id_for_logical_port(lport) - port_tbl = self.xcvr_table_helper.get_cfg_port_tbl(asic_index) + port_tbl = self.xcvr_table_helper.get_cfg_port_tbl(self.get_asic_id(lport)) found, port_info = port_tbl.get(lport) if found and 'laser_freq' in dict(port_info): @@ -1173,8 +1179,7 @@ def get_configured_tx_power_from_db(self, lport): Return the Tx power configured by user in CONFIG_DB's PORT table """ power = 0 - asic_index = self.port_mapping.get_asic_id_for_logical_port(lport) - port_tbl = self.xcvr_table_helper.get_cfg_port_tbl(asic_index) + port_tbl = self.xcvr_table_helper.get_cfg_port_tbl(self.get_asic_id(lport)) found, port_info = port_tbl.get(lport) if found and 'tx_power' in dict(port_info): @@ -1184,8 +1189,7 @@ def get_configured_tx_power_from_db(self, lport): def get_host_tx_status(self, lport): host_tx_ready = 'false' - asic_index = self.port_mapping.get_asic_id_for_logical_port(lport) - state_port_tbl = self.xcvr_table_helper.get_state_port_tbl(asic_index) + state_port_tbl = self.xcvr_table_helper.get_state_port_tbl(self.get_asic_id(lport)) found, port_info = state_port_tbl.get(lport) if found and 'host_tx_ready' in dict(port_info): @@ -1195,8 +1199,7 @@ def get_host_tx_status(self, lport): def get_port_admin_status(self, lport): admin_status = 'down' - asic_index = self.port_mapping.get_asic_id_for_logical_port(lport) - cfg_port_tbl = self.xcvr_table_helper.get_cfg_port_tbl(asic_index) + cfg_port_tbl = self.xcvr_table_helper.get_cfg_port_tbl(self.get_asic_id(lport)) found, port_info = cfg_port_tbl.get(lport) if found: @@ -1266,8 +1269,7 @@ def post_port_active_apsel_to_db(self, api, lport, host_lanes_mask): media_lane_count = appl_advt_act.get('media_lane_count', 'N/A') if appl_advt_act else 'N/A' tuple_list.append(('media_lane_count', str(media_lane_count))) - asic_index = self.port_mapping.get_asic_id_for_logical_port(lport) - intf_tbl = self.xcvr_table_helper.get_intf_tbl(asic_index) + intf_tbl = self.xcvr_table_helper.get_intf_tbl(self.get_asic_id(lport)) if not intf_tbl: helper_logger.log_warning("Active ApSel db update: TRANSCEIVER_INFO table not found for {}".format(lport)) return @@ -1301,7 +1303,7 @@ def wait_for_port_config_done(self, namespace): break def handle_cmis_state_machine(self, lport, info, is_fast_reboot): - state = get_cmis_state_from_state_db(lport, self.xcvr_table_helper.get_status_tbl(self.port_mapping.get_asic_id_for_logical_port(lport))) + state = get_cmis_state_from_state_db(lport, self.xcvr_table_helper.get_status_tbl(self.get_asic_id(lport))) if state in CMIS_TERMINAL_STATES or state == CMIS_STATE_UNKNOWN: if state != CMIS_STATE_READY: self.port_dict[lport]['appl'] = 0 @@ -1621,6 +1623,17 @@ def handle_cmis_state_machine(self, lport, info, is_fast_reboot): log_exception_traceback() self.update_port_transceiver_status_table_sw_cmis_state(lport, CMIS_STATE_FAILED) + def do_task(self, is_fast_reboot): + for lport, info in self.port_dict.items(): + if self.task_stopping_event.is_set(): + break + + self.handle_cmis_state_machine(lport, info, is_fast_reboot) + + for event in self.deleted_ports.values(): + self.port_dict.pop(event.port_name) + + self.deleted_ports = {} def task_worker(self): self.xcvr_table_helper = XcvrTableHelper(self.namespaces) @@ -1629,8 +1642,7 @@ def task_worker(self): for namespace in self.namespaces: self.wait_for_port_config_done(namespace) - logical_port_list = self.port_mapping.logical_port_list - for lport in logical_port_list: + for lport in self.port_dict.keys(): self.update_port_transceiver_status_table_sw_cmis_state(lport, CMIS_STATE_UNKNOWN) is_fast_reboot = is_fast_reboot_enabled() @@ -1643,11 +1655,7 @@ def task_worker(self): while not self.task_stopping_event.is_set(): # Handle port change event from main thread port_change_observer.handle_port_update_event() - - for lport, info in self.port_dict.items(): - if self.task_stopping_event.is_set(): - break - self.handle_cmis_state_machine(lport, info, is_fast_reboot) + self.do_task(is_fast_reboot) self.log_notice("Stopped")