From e8faab0b81201561da168c17a512269e1ae4ea88 Mon Sep 17 00:00:00 2001 From: Gregory Boudreau <45526465+gregoryboudreau@users.noreply.github.com> Date: Thu, 7 Dec 2023 13:38:09 -0800 Subject: [PATCH] cherrypicked into 202205 branch (#409) --- sonic-thermalctld/scripts/thermalctld | 41 +++++++++++---------- sonic-thermalctld/tests/mock_platform.py | 3 ++ sonic-thermalctld/tests/test_thermalctld.py | 6 +-- 3 files changed, 28 insertions(+), 22 deletions(-) diff --git a/sonic-thermalctld/scripts/thermalctld b/sonic-thermalctld/scripts/thermalctld index 09b573c28..83ac9d67f 100644 --- a/sonic-thermalctld/scripts/thermalctld +++ b/sonic-thermalctld/scripts/thermalctld @@ -541,10 +541,10 @@ class TemperatureUpdater(logger.Logger): self.table = swsscommon.Table(state_db, TemperatureUpdater.TEMPER_INFO_TABLE_NAME) self.phy_entity_table = swsscommon.Table(state_db, PHYSICAL_ENTITY_INFO_TABLE) self.chassis_table = None + self.all_thermals = set() self.is_chassis_system = chassis.is_modular_chassis() if self.is_chassis_system: - self.module_thermals = set() my_slot = try_get(chassis.get_my_slot, INVALID_SLOT) if my_slot != INVALID_SLOT: try: @@ -587,19 +587,23 @@ class TemperatureUpdater(logger.Logger): :return: """ self.log_debug("Start temperature updating") + available_thermals = set() for index, thermal in enumerate(self.chassis.get_all_thermals()): if self.task_stopping_event.is_set(): return + available_thermals.add((thermal, CHASSIS_INFO_KEY, index)) self._refresh_temperature_status(CHASSIS_INFO_KEY, thermal, index) for psu_index, psu in enumerate(self.chassis.get_all_psus()): parent_name = 'PSU {}'.format(psu_index + 1) - for thermal_index, thermal in enumerate(psu.get_all_thermals()): - if self.task_stopping_event.is_set(): - return + if psu.get_presence(): + for thermal_index, thermal in enumerate(psu.get_all_thermals()): + if self.task_stopping_event.is_set(): + return - self._refresh_temperature_status(parent_name, thermal, thermal_index) + available_thermals.add((thermal, parent_name, thermal_index)) + self._refresh_temperature_status(parent_name, thermal, thermal_index) for sfp_index, sfp in enumerate(self.chassis.get_all_sfps()): parent_name = 'SFP {}'.format(sfp_index + 1) @@ -607,10 +611,10 @@ class TemperatureUpdater(logger.Logger): if self.task_stopping_event.is_set(): return + available_thermals.add((thermal, parent_name, thermal_index)) self._refresh_temperature_status(parent_name, thermal, thermal_index) if self.is_chassis_system: - available_thermals = set() for module_index, module in enumerate(self.chassis.get_all_modules()): module_name = try_get(module.get_name, 'Module {}'.format(module_index + 1)) @@ -631,20 +635,19 @@ class TemperatureUpdater(logger.Logger): self._refresh_temperature_status(sfp_name, thermal, thermal_index) for psu_index, psu in enumerate(module.get_all_psus()): - psu_name = '{} PSU {}'.format(module_name, psu_index + 1) - for thermal_index, thermal in enumerate(psu.get_all_thermals()): - if self.task_stopping_event.is_set(): - return + if psu.get_presence(): + psu_name = '{} PSU {}'.format(module_name, psu_index + 1) + for thermal_index, thermal in enumerate(psu.get_all_thermals()): + if self.task_stopping_event.is_set(): + return + available_thermals.add((thermal, psu_name, thermal_index)) + self._refresh_temperature_status(psu_name, thermal, thermal_index) + + thermals_to_remove = self.all_thermals - available_thermals + self.all_thermals = available_thermals + for thermal, parent_name, thermal_index in thermals_to_remove: + self._remove_thermal_from_db(thermal, parent_name, thermal_index) - available_thermals.add((thermal, psu_name, thermal_index)) - self._refresh_temperature_status(psu_name, thermal, thermal_index) - - - thermals_to_remove = self.module_thermals - available_thermals - self.module_thermals = available_thermals - for thermal, parent_name, thermal_index in thermals_to_remove: - self._remove_thermal_from_db(thermal, parent_name, thermal_index) - self.log_debug("End temperature updating") def _refresh_temperature_status(self, parent_name, thermal, thermal_index): diff --git a/sonic-thermalctld/tests/mock_platform.py b/sonic-thermalctld/tests/mock_platform.py index 660903226..038347e48 100644 --- a/sonic-thermalctld/tests/mock_platform.py +++ b/sonic-thermalctld/tests/mock_platform.py @@ -175,6 +175,9 @@ def get_serial(self): def get_status(self): return self._status + + def get_powergood_status(self): + return self._status def set_status(self, status): self._status = status diff --git a/sonic-thermalctld/tests/test_thermalctld.py b/sonic-thermalctld/tests/test_thermalctld.py index 3c8d14c89..cd25c5261 100644 --- a/sonic-thermalctld/tests/test_thermalctld.py +++ b/sonic-thermalctld/tests/test_thermalctld.py @@ -529,11 +529,11 @@ def test_update_module_thermals(self): chassis.set_modular_chassis(True) temperature_updater = thermalctld.TemperatureUpdater(chassis, multiprocessing.Event()) temperature_updater.update() - assert len(temperature_updater.module_thermals) == 3 - + assert len(temperature_updater.all_thermals) == 3 + chassis._module_list = [] temperature_updater.update() - assert len(temperature_updater.module_thermals) == 0 + assert len(temperature_updater.all_thermals) == 0 # Modular chassis-related tests