Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Smartswitch][DPU] Addition of DPU Chassis for thermalctld #564

Merged
merged 1 commit into from
Nov 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 9 additions & 6 deletions sonic-thermalctld/scripts/thermalctld
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ SYSLOG_IDENTIFIER = 'thermalctld'
NOT_AVAILABLE = 'N/A'
CHASSIS_INFO_KEY = 'chassis 1'
PHYSICAL_ENTITY_INFO_TABLE = 'PHYSICAL_ENTITY_INFO'
INVALID_SLOT = -1
INVALID_SLOT_OR_DPU = -1

ERR_UNKNOWN = 1

Expand Down Expand Up @@ -523,9 +523,11 @@ class TemperatureUpdater(logger.Logger):
self.all_thermals = set()

self.is_chassis_system = chassis.is_modular_chassis()
if self.is_chassis_system:
my_slot = try_get(chassis.get_my_slot, INVALID_SLOT)
if my_slot != INVALID_SLOT:
self.is_smartswitch_dpu = chassis.is_smartswitch() and chassis.is_dpu()
self.is_chassis_upd_required = self.is_chassis_system or self.is_smartswitch_dpu
if self.is_chassis_upd_required:
my_slot = try_get(chassis.get_my_slot if self.is_chassis_system else chassis.get_dpu_id, INVALID_SLOT_OR_DPU)
if my_slot != INVALID_SLOT_OR_DPU:
try:
# Modular chassis does not have to have table CHASSIS_STATE_DB.
# So catch the exception here and ignore it.
Expand All @@ -540,7 +542,7 @@ class TemperatureUpdater(logger.Logger):
table_keys = self.table.getKeys()
for tk in table_keys:
self.table._del(tk)
if self.is_chassis_system and self.chassis_table is not None:
if self.is_chassis_upd_required and self.chassis_table is not None:
self.chassis_table._del(tk)
if self.phy_entity_table:
phy_entity_keys = self.phy_entity_table.getKeys()
Expand Down Expand Up @@ -593,6 +595,7 @@ class TemperatureUpdater(logger.Logger):
available_thermals.add((thermal, parent_name, thermal_index))
self._refresh_temperature_status(parent_name, thermal, thermal_index)

# As there are no modules present in DPU, this IF condition is not updated to consider DPU chassis
if self.is_chassis_system:
for module_index, module in enumerate(self.chassis.get_all_modules()):
module_name = try_get(module.get_name, 'Module {}'.format(module_index + 1))
Expand Down Expand Up @@ -702,7 +705,7 @@ class TemperatureUpdater(logger.Logger):
])

self.table.set(name, fvs)
if self.is_chassis_system and self.chassis_table is not None:
if self.is_chassis_upd_required and self.chassis_table is not None:
self.chassis_table.set(name, fvs)
except Exception as e:
self.log_warning('Failed to update thermal status for {} - {}'.format(name, repr(e)))
Expand Down
23 changes: 23 additions & 0 deletions sonic-thermalctld/tests/mock_platform.py
Original file line number Diff line number Diff line change
Expand Up @@ -335,7 +335,10 @@ def __init__(self):
self._replaceable = False

self._is_chassis_system = False
self._is_dpu = False
self._is_smartswitch = False
self._my_slot = module_base.ModuleBase.MODULE_INVALID_SLOT
self._dpu_id = None
self._thermal_manager = MockThermalManager()

def make_absent_fan(self):
Expand Down Expand Up @@ -445,6 +448,26 @@ def get_position_in_parent(self):
def is_replaceable(self):
return self._replaceable

def is_dpu(self):
return self._is_dpu

def is_smartswitch(self):
return self._is_smartswitch

def set_smartswitch(self, is_true):
self._is_smartswitch = is_true

def set_dpu(self, is_true):
self._is_dpu = is_true

def set_dpu_id(self, dpu_id):
self._dpu_id = dpu_id

def get_dpu_id(self):
# The default behaviour is Not implemented Error
if not self._dpu_id:
raise NotImplementedError
return self._dpu_id

class MockModule(module_base.ModuleBase):
def __init__(self):
Expand Down
68 changes: 68 additions & 0 deletions sonic-thermalctld/tests/test_thermalctld.py
Original file line number Diff line number Diff line change
Expand Up @@ -508,6 +508,74 @@ def test_update_module_thermals(self):
assert len(temperature_updater.all_thermals) == 0


# DPU chassis-related tests
def test_dpu_chassis_thermals():
chassis = MockChassis()
# Modular chassis (Not a dpu chassis) No Change in TemperatureUpdater Behaviour
chassis.set_modular_chassis(True)
chassis.set_my_slot(1)
temperature_updater = thermalctld.TemperatureUpdater(chassis, multiprocessing.Event())
assert temperature_updater.chassis_table
# DPU chassis TemperatureUpdater without is_smartswitch False return - No update to CHASSIS_STATE_DB
chassis.set_modular_chassis(False)
chassis.set_dpu(True)
temperature_updater = thermalctld.TemperatureUpdater(chassis, multiprocessing.Event())
assert not temperature_updater.chassis_table
# DPU chassis TemperatureUpdater without get_dpu_id implmenetation- No update to CHASSIS_STATE_DB
chassis.set_smartswitch(True)
temperature_updater = thermalctld.TemperatureUpdater(chassis, multiprocessing.Event())
assert not temperature_updater.chassis_table
# DPU chassis TemperatureUpdater with get_dpu_id implemented - Update data to CHASSIS_STATE_DB
dpu_id = 1
chassis.set_dpu_id(dpu_id)
temperature_updater = thermalctld.TemperatureUpdater(chassis, multiprocessing.Event())
assert temperature_updater.chassis_table
# Table name in chassis state db = TEMPERATURE_INFO_0 for dpu_id 0
assert temperature_updater.chassis_table.table_name == f"{TEMPER_INFO_TABLE_NAME}_{dpu_id}"
temperature_updater.table = Table("STATE_DB", "xtable")
temperature_updater.table._del = mock.MagicMock()


def test_dpu_chassis_state_deinit():
# Confirm that the chassis_table entries for DPU Chassis are removed on deletion
chassis = MockChassis()
chassis.set_smartswitch(True)
chassis.set_modular_chassis(False)
chassis.set_dpu(True)
chassis.set_dpu_id(1)
temperature_updater = thermalctld.TemperatureUpdater(chassis, multiprocessing.Event())
assert temperature_updater.chassis_table
temperature_updater.table = Table("STATE_DB", "xtable")
temperature_updater.phy_entity_table = None
temperature_updater.table.getKeys = mock.MagicMock(return_value=['key1', 'key2'])
temperature_updater.table._del = mock.MagicMock()
temperature_updater.chassis_table = Table("CHASSIS_STATE_DB", "ctable")
temperature_updater.chassis_table._del = mock.MagicMock()
temperature_updater.__del__()
assert temperature_updater.chassis_table._del.call_count == 2
expected_calls = [mock.call('key1'), mock.call('key2')]
temperature_updater.chassis_table._del.assert_has_calls(expected_calls, any_order=True)


def test_updater_dpu_thermal_check_chassis_table():
chassis = MockChassis()

thermal1 = MockThermal()
chassis.get_all_thermals().append(thermal1)

chassis.set_dpu(True)
chassis.set_smartswitch(True)
chassis.set_dpu_id(1)
temperature_updater = thermalctld.TemperatureUpdater(chassis, multiprocessing.Event())
temperature_updater.update()
assert temperature_updater.chassis_table.get_size() == chassis.get_num_thermals()

thermal2 = MockThermal()
chassis.get_all_thermals().append(thermal2)
temperature_updater.update()
assert temperature_updater.chassis_table.get_size() == chassis.get_num_thermals()


# Modular chassis-related tests


Expand Down
Loading