From 0431fa3b925f22f3d014a56e77d0004d3d0cd42b Mon Sep 17 00:00:00 2001 From: Gagan Punathil Ellath Date: Tue, 26 Nov 2024 19:32:30 -0800 Subject: [PATCH] Addition of DPU Chassis for thermalctld (#564) --- sonic-thermalctld/scripts/thermalctld | 15 +++-- sonic-thermalctld/tests/mock_platform.py | 23 +++++++ sonic-thermalctld/tests/test_thermalctld.py | 68 +++++++++++++++++++++ 3 files changed, 100 insertions(+), 6 deletions(-) diff --git a/sonic-thermalctld/scripts/thermalctld b/sonic-thermalctld/scripts/thermalctld index 82d64a105..2739bd926 100644 --- a/sonic-thermalctld/scripts/thermalctld +++ b/sonic-thermalctld/scripts/thermalctld @@ -26,7 +26,7 @@ SYSLOG_IDENTIFIER = 'thermalctld' NOT_AVAILABLE = 'N/A' CHASSIS_INFO_KEY = 'chassis 1' PHYSICAL_ENTITY_INFO_TABLE = 'PHYSICAL_ENTITY_INFO' -INVALID_SLOT = -1 +INVALID_SLOT_OR_DPU = -1 ERR_UNKNOWN = 1 @@ -523,9 +523,11 @@ class TemperatureUpdater(logger.Logger): self.all_thermals = set() self.is_chassis_system = chassis.is_modular_chassis() - if self.is_chassis_system: - my_slot = try_get(chassis.get_my_slot, INVALID_SLOT) - if my_slot != INVALID_SLOT: + self.is_smartswitch_dpu = chassis.is_smartswitch() and chassis.is_dpu() + self.is_chassis_upd_required = self.is_chassis_system or self.is_smartswitch_dpu + if self.is_chassis_upd_required: + my_slot = try_get(chassis.get_my_slot if self.is_chassis_system else chassis.get_dpu_id, INVALID_SLOT_OR_DPU) + if my_slot != INVALID_SLOT_OR_DPU: try: # Modular chassis does not have to have table CHASSIS_STATE_DB. # So catch the exception here and ignore it. @@ -540,7 +542,7 @@ class TemperatureUpdater(logger.Logger): table_keys = self.table.getKeys() for tk in table_keys: self.table._del(tk) - if self.is_chassis_system and self.chassis_table is not None: + if self.is_chassis_upd_required and self.chassis_table is not None: self.chassis_table._del(tk) if self.phy_entity_table: phy_entity_keys = self.phy_entity_table.getKeys() @@ -593,6 +595,7 @@ class TemperatureUpdater(logger.Logger): available_thermals.add((thermal, parent_name, thermal_index)) self._refresh_temperature_status(parent_name, thermal, thermal_index) + # As there are no modules present in DPU, this IF condition is not updated to consider DPU chassis if self.is_chassis_system: for module_index, module in enumerate(self.chassis.get_all_modules()): module_name = try_get(module.get_name, 'Module {}'.format(module_index + 1)) @@ -702,7 +705,7 @@ class TemperatureUpdater(logger.Logger): ]) self.table.set(name, fvs) - if self.is_chassis_system and self.chassis_table is not None: + if self.is_chassis_upd_required and self.chassis_table is not None: self.chassis_table.set(name, fvs) except Exception as e: self.log_warning('Failed to update thermal status for {} - {}'.format(name, repr(e))) diff --git a/sonic-thermalctld/tests/mock_platform.py b/sonic-thermalctld/tests/mock_platform.py index 038347e48..1b7a43039 100644 --- a/sonic-thermalctld/tests/mock_platform.py +++ b/sonic-thermalctld/tests/mock_platform.py @@ -335,7 +335,10 @@ def __init__(self): self._replaceable = False self._is_chassis_system = False + self._is_dpu = False + self._is_smartswitch = False self._my_slot = module_base.ModuleBase.MODULE_INVALID_SLOT + self._dpu_id = None self._thermal_manager = MockThermalManager() def make_absent_fan(self): @@ -445,6 +448,26 @@ def get_position_in_parent(self): def is_replaceable(self): return self._replaceable + def is_dpu(self): + return self._is_dpu + + def is_smartswitch(self): + return self._is_smartswitch + + def set_smartswitch(self, is_true): + self._is_smartswitch = is_true + + def set_dpu(self, is_true): + self._is_dpu = is_true + + def set_dpu_id(self, dpu_id): + self._dpu_id = dpu_id + + def get_dpu_id(self): + # The default behaviour is Not implemented Error + if not self._dpu_id: + raise NotImplementedError + return self._dpu_id class MockModule(module_base.ModuleBase): def __init__(self): diff --git a/sonic-thermalctld/tests/test_thermalctld.py b/sonic-thermalctld/tests/test_thermalctld.py index 151b72fb7..9f16b4111 100644 --- a/sonic-thermalctld/tests/test_thermalctld.py +++ b/sonic-thermalctld/tests/test_thermalctld.py @@ -508,6 +508,74 @@ def test_update_module_thermals(self): assert len(temperature_updater.all_thermals) == 0 +# DPU chassis-related tests +def test_dpu_chassis_thermals(): + chassis = MockChassis() + # Modular chassis (Not a dpu chassis) No Change in TemperatureUpdater Behaviour + chassis.set_modular_chassis(True) + chassis.set_my_slot(1) + temperature_updater = thermalctld.TemperatureUpdater(chassis, multiprocessing.Event()) + assert temperature_updater.chassis_table + # DPU chassis TemperatureUpdater without is_smartswitch False return - No update to CHASSIS_STATE_DB + chassis.set_modular_chassis(False) + chassis.set_dpu(True) + temperature_updater = thermalctld.TemperatureUpdater(chassis, multiprocessing.Event()) + assert not temperature_updater.chassis_table + # DPU chassis TemperatureUpdater without get_dpu_id implmenetation- No update to CHASSIS_STATE_DB + chassis.set_smartswitch(True) + temperature_updater = thermalctld.TemperatureUpdater(chassis, multiprocessing.Event()) + assert not temperature_updater.chassis_table + # DPU chassis TemperatureUpdater with get_dpu_id implemented - Update data to CHASSIS_STATE_DB + dpu_id = 1 + chassis.set_dpu_id(dpu_id) + temperature_updater = thermalctld.TemperatureUpdater(chassis, multiprocessing.Event()) + assert temperature_updater.chassis_table + # Table name in chassis state db = TEMPERATURE_INFO_0 for dpu_id 0 + assert temperature_updater.chassis_table.table_name == f"{TEMPER_INFO_TABLE_NAME}_{dpu_id}" + temperature_updater.table = Table("STATE_DB", "xtable") + temperature_updater.table._del = mock.MagicMock() + + +def test_dpu_chassis_state_deinit(): + # Confirm that the chassis_table entries for DPU Chassis are removed on deletion + chassis = MockChassis() + chassis.set_smartswitch(True) + chassis.set_modular_chassis(False) + chassis.set_dpu(True) + chassis.set_dpu_id(1) + temperature_updater = thermalctld.TemperatureUpdater(chassis, multiprocessing.Event()) + assert temperature_updater.chassis_table + temperature_updater.table = Table("STATE_DB", "xtable") + temperature_updater.phy_entity_table = None + temperature_updater.table.getKeys = mock.MagicMock(return_value=['key1', 'key2']) + temperature_updater.table._del = mock.MagicMock() + temperature_updater.chassis_table = Table("CHASSIS_STATE_DB", "ctable") + temperature_updater.chassis_table._del = mock.MagicMock() + temperature_updater.__del__() + assert temperature_updater.chassis_table._del.call_count == 2 + expected_calls = [mock.call('key1'), mock.call('key2')] + temperature_updater.chassis_table._del.assert_has_calls(expected_calls, any_order=True) + + +def test_updater_dpu_thermal_check_chassis_table(): + chassis = MockChassis() + + thermal1 = MockThermal() + chassis.get_all_thermals().append(thermal1) + + chassis.set_dpu(True) + chassis.set_smartswitch(True) + chassis.set_dpu_id(1) + temperature_updater = thermalctld.TemperatureUpdater(chassis, multiprocessing.Event()) + temperature_updater.update() + assert temperature_updater.chassis_table.get_size() == chassis.get_num_thermals() + + thermal2 = MockThermal() + chassis.get_all_thermals().append(thermal2) + temperature_updater.update() + assert temperature_updater.chassis_table.get_size() == chassis.get_num_thermals() + + # Modular chassis-related tests