Skip to content

Commit

Permalink
Thermalctld changes for Voltage sensor monitor.
Browse files Browse the repository at this point in the history
  • Loading branch information
bmridul committed Jun 19, 2023
1 parent 4e4f3cb commit 7b363f3
Showing 1 changed file with 237 additions and 0 deletions.
237 changes: 237 additions & 0 deletions sonic-thermalctld/scripts/thermalctld
Original file line number Diff line number Diff line change
Expand Up @@ -731,6 +731,241 @@ class TemperatureUpdater(logger.Logger):
self.chassis_table._del(name)


class VoltageStatus(logger.Logger):

def __init__(self):
super(VoltageStatus, self).__init__(SYSLOG_IDENTIFIER)

self.voltage = None
self.over_voltage = False
self.under_voltage = False

def set_voltage(self, name, voltage):
"""
Record voltage changes.
:param name: Name of the voltage sensor.
:param voltage: New voltage value.
:return:
"""
if voltage == NOT_AVAILABLE:
if self.voltage is not None:
self.log_warning('Voltage of {} became unavailable'.format(name))
self.voltage = None
return

self.voltage = voltage

def set_over_voltage(self, voltage, threshold):
"""
Set over voltage status
:param voltage: voltage
:param threshold: High threshold
:return: True if over voltage status changed else False
"""
if voltage == NOT_AVAILABLE or threshold == NOT_AVAILABLE:
self.log_warning('Voltage/threshold of {} became unavailable {}/{}'.format(self.name, voltage, threshold))
old_status = self.over_voltage
self.over_voltage = False
return old_status != self.over_voltage

status = voltage > threshold
if status == self.over_voltage:
return False

self.over_voltage = status
return True

def set_under_voltage(self, voltage, threshold):
"""
Set under voltage status
:param voltage: voltage
:param threshold: Low threshold
:return: True if under voltage status changed else False
"""
if voltage == NOT_AVAILABLE or threshold == NOT_AVAILABLE:
old_status = self.under_voltage
self.under_voltage = False
return old_status != self.under_voltage

status = voltage < threshold
if status == self.under_voltage:
return False

self.under_voltage = status
return True


#
# VoltageUpdater ======================================================================
#
class VoltageUpdater(logger.Logger):
# Voltage information table name in database
VOLTAGE_INFO_TABLE_NAME = 'VOLTAGE_INFO'

def __init__(self, chassis, task_stopping_event):
"""
Initializer of VoltageUpdater
:param chassis: Object representing a platform chassis
"""
super(VoltageUpdater, self).__init__(SYSLOG_IDENTIFIER)

self.chassis = chassis
self.task_stopping_event = task_stopping_event
self.voltage_status_dict = {}
state_db = daemon_base.db_connect("STATE_DB")
self.table = swsscommon.Table(state_db, VoltageUpdater.VOLTAGE_INFO_TABLE_NAME)
self.chassis_table = None

self.is_chassis_system = chassis.is_modular_chassis()
if self.is_chassis_system:
self.module_vsensors = set()
my_slot = try_get(chassis.get_my_slot, INVALID_SLOT)
if my_slot != INVALID_SLOT:
try:
# Modular chassis does not have to have table CHASSIS_STATE_DB.
# So catch the exception here and ignore it.
table_name = VoltageUpdater.VOLTAGE_INFO_TABLE_NAME+'_'+str(my_slot)
chassis_state_db = daemon_base.db_connect("CHASSIS_STATE_DB")
self.chassis_table = swsscommon.Table(chassis_state_db, table_name)
except Exception as e:
self.chassis_table = None

def __del__(self):
if self.table:
table_keys = self.table.getKeys()
for tk in table_keys:
self.table._del(tk)
if self.is_chassis_system and self.chassis_table is not None:
self.chassis_table._del(tk)

def _log_on_status_changed(self, normal_status, normal_log, abnormal_log):
"""
Log when any status changed
:param normal_status: Expected status.
:param normal_log: Log string for expected status.
:param abnormal_log: Log string for unexpected status
:return:
"""
if normal_status:
self.log_notice(normal_log)
else:
self.log_warning(abnormal_log)

def update(self):
"""
Update all voltage information to database
:return:
"""
self.log_debug("Start voltage updating")
for index, voltage_sensor in enumerate(self.chassis.get_all_vsensors()):
if self.task_stopping_event.is_set():
return

print(index, voltage_sensor)
self._refresh_voltage_status(CHASSIS_INFO_KEY, voltage_sensor, index)

if self.is_chassis_system:
available_vsensors = set()
for module_index, module in enumerate(self.chassis.get_all_modules()):
module_name = try_get(module.get_name, 'Module {}'.format(module_index + 1))

for vsensor_index, vsensor in enumerate(module.get_all_vsensors()):
if self.task_stopping_event.is_set():
return

available_vsensors.add((vsensor, module_name, vsensor_index))
self._refresh_voltage_status(module_name, vsensor, vsensor_index)

vsensors_to_remove = self.module_vsensors - available_vsensors
self.module_vsensors = available_vsensors
for vsensor, parent_name, vsensor_index in vsensors_to_remove:
self._remove_vsensor_from_db(vsensor, parent_name, vsensor_index)

self.log_debug("End Voltage updating")

def _refresh_voltage_status(self, parent_name, vsensor, vsensor_index):
"""
Get voltage status by platform API and write to database
:param parent_name: Name of parent device of the vsensor object
:param vsensor: Object representing a platform voltage vsensor
:param vsensor_index: Index of the vsensor object in platform chassis
:return:
"""
try:
name = try_get(vsensor.get_name, '{} vsensor {}'.format(parent_name, vsensor_index + 1))
print(name)

if name not in self.voltage_status_dict:
self.voltage_status_dict[name] = VoltageStatus()

print(self.voltage_status_dict)

voltage_status = self.voltage_status_dict[name]
print(voltage_status)

high_threshold = NOT_AVAILABLE
low_threshold = NOT_AVAILABLE
high_critical_threshold = NOT_AVAILABLE
low_critical_threshold = NOT_AVAILABLE
maximum_voltage = NOT_AVAILABLE
minimum_voltage = NOT_AVAILABLE
voltage = try_get(vsensor.get_voltage)
print(voltage)
is_replaceable = try_get(vsensor.is_replaceable, False)
if voltage != NOT_AVAILABLE:
voltage_status.set_voltage(name, voltage)
minimum_voltage = try_get(vsensor.get_minimum_recorded)
maximum_voltage = try_get(vsensor.get_maximum_recorded)
high_threshold = try_get(vsensor.get_high_threshold)
low_threshold = try_get(vsensor.get_low_threshold)
high_critical_threshold = try_get(vsensor.get_high_critical_threshold)
low_critical_threshold = try_get(vsensor.get_low_critical_threshold)

warning = False
if voltage != NOT_AVAILABLE and voltage_status.set_over_voltage(voltage, high_threshold):
self._log_on_status_changed(not voltage_status.over_voltage,
'High voltage warning cleared: {} voltage restored to {}C, high threshold {}C'.
format(name, voltage, high_threshold),
'High voltage warning: {} current voltage {}C, high threshold {}C'.
format(name, voltage, high_threshold)
)
warning = warning | voltage_status.over_voltage

if voltage != NOT_AVAILABLE and voltage_status.set_under_voltage(voltage, low_threshold):
self._log_on_status_changed(not voltage_status.under_voltage,
'Low voltage warning cleared: {} voltage restored to {}C, low threshold {}C'.
format(name, voltage, low_threshold),
'Low voltage warning: {} current voltage {}C, low threshold {}C'.
format(name, voltage, low_threshold)
)
warning = warning | voltage_status.under_voltage

fvs = swsscommon.FieldValuePairs(
[('voltage', str(voltage)),
('minimum_voltage', str(minimum_voltage)),
('maximum_voltage', str(maximum_voltage)),
('high_threshold', str(high_threshold)),
('low_threshold', str(low_threshold)),
('warning_status', str(warning)),
('critical_high_threshold', str(high_critical_threshold)),
('critical_low_threshold', str(low_critical_threshold)),
('is_replaceable', str(is_replaceable)),
('timestamp', datetime.now().strftime('%Y%m%d %H:%M:%S'))
])

self.table.set(name, fvs)
if self.is_chassis_system and self.chassis_table is not None:
self.chassis_table.set(name, fvs)
except Exception as e:
self.log_warning('Failed to update vsensor status for {} - {}'.format(name, repr(e)))

def _remove_vsensor_from_db(self, vsensor, parent_name, vsensor_index):
name = try_get(vsensor.get_name, '{} vsensor {}'.format(parent_name, vsensor_index + 1))
self.table._del(name)

if self.chassis_table is not None:
self.chassis_table._del(name)

class ThermalMonitor(ProcessTaskBase):
# Initial update interval
INITIAL_INTERVAL = 5
Expand Down Expand Up @@ -758,11 +993,13 @@ class ThermalMonitor(ProcessTaskBase):

self.fan_updater = FanUpdater(chassis, self.task_stopping_event)
self.temperature_updater = TemperatureUpdater(chassis, self.task_stopping_event)
self.voltage_updater = VoltageUpdater(chassis, self.task_stopping_event)

def main(self):
begin = time.time()
self.fan_updater.update()
self.temperature_updater.update()
self.voltage_updater.update()
elapsed = time.time() - begin
if elapsed < self.UPDATE_INTERVAL:
self.wait_time = self.UPDATE_INTERVAL - elapsed
Expand Down

0 comments on commit 7b363f3

Please sign in to comment.