diff --git a/.github/codeql/codeql-config.yml b/.github/codeql/codeql-config.yml new file mode 100644 index 000000000..2c8b0498f --- /dev/null +++ b/.github/codeql/codeql-config.yml @@ -0,0 +1,4 @@ +name: "CodeQL config" +queries: + - uses: security-and-quality + - uses: security-extended diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml new file mode 100644 index 000000000..6478fb99f --- /dev/null +++ b/.github/workflows/codeql-analysis.yml @@ -0,0 +1,43 @@ +# For more infomation, please visit: https://github.com/github/codeql-action + +name: "CodeQL" + +on: + push: + branches: + - 'master' + - '202[0-9][0-9][0-9]' + pull_request_target: + branches: + - 'master' + - '202[0-9][0-9][0-9]' + +jobs: + analyze: + name: Analyze + runs-on: ubuntu-latest + permissions: + actions: read + contents: read + security-events: write + + strategy: + fail-fast: false + matrix: + language: [ 'python' ] + + steps: + - name: Checkout repository + uses: actions/checkout@v3 + + # Initializes the CodeQL tools for scanning. + - name: Initialize CodeQL + uses: github/codeql-action/init@v2 + with: + config-file: ./.github/codeql/codeql-config.yml + languages: ${{ matrix.language }} + + - name: Perform CodeQL Analysis + uses: github/codeql-action/analyze@v2 + with: + category: "/language:${{matrix.language}}" diff --git a/sonic-chassisd/scripts/chassisd b/sonic-chassisd/scripts/chassisd index 5fb864470..d465c0209 100644 --- a/sonic-chassisd/scripts/chassisd +++ b/sonic-chassisd/scripts/chassisd @@ -51,6 +51,7 @@ CHASSIS_MODULE_INFO_NUM_ASICS_FIELD = 'num_asics' CHASSIS_MODULE_INFO_ASICS = 'asics' CHASSIS_ASIC_INFO_TABLE = 'CHASSIS_ASIC_TABLE' +CHASSIS_FABRIC_ASIC_INFO_TABLE = 'CHASSIS_FABRIC_ASIC_TABLE' CHASSIS_ASIC = 'asic' CHASSIS_ASIC_PCI_ADDRESS_FIELD = 'asic_pci_address' CHASSIS_ASIC_ID_IN_MODULE_FIELD = 'asic_id_in_module' @@ -158,7 +159,7 @@ class ModuleConfigUpdater(logger.Logger): class ModuleUpdater(logger.Logger): - def __init__(self, log_identifier, chassis): + def __init__(self, log_identifier, chassis, my_slot, supervisor_slot): """ Constructor for ModuleUpdater :param chassis: Object representing a platform chassis @@ -166,6 +167,8 @@ class ModuleUpdater(logger.Logger): super(ModuleUpdater, self).__init__(log_identifier) self.chassis = chassis + self.my_slot = my_slot + self.supervisor_slot = supervisor_slot self.num_modules = chassis.get_num_modules() # Connect to STATE_DB and create chassis info tables state_db = daemon_base.db_connect("STATE_DB") @@ -177,9 +180,14 @@ class ModuleUpdater(logger.Logger): CHASSIS_MODULE_INFO_SLOT_FIELD, CHASSIS_MODULE_INFO_OPERSTATUS_FIELD] - chassis_state_db = daemon_base.db_connect("CHASSIS_STATE_DB") - self.asic_table = swsscommon.Table(chassis_state_db, CHASSIS_ASIC_INFO_TABLE) - + self.chassis_state_db = daemon_base.db_connect("CHASSIS_STATE_DB") + if self._is_supervisor(): + self.asic_table = swsscommon.Table(self.chassis_state_db, + CHASSIS_FABRIC_ASIC_INFO_TABLE) + else: + self.asic_table = swsscommon.Table(self.chassis_state_db, + CHASSIS_ASIC_INFO_TABLE) +# self.midplane_initialized = try_get(chassis.init_midplane_switch, default=False) if not self.midplane_initialized: self.log_error("Chassisd midplane intialization failed") @@ -240,25 +248,28 @@ class ModuleUpdater(logger.Logger): self.module_table.set(key, fvs) if module_info_dict[CHASSIS_MODULE_INFO_OPERSTATUS_FIELD] != str(ModuleBase.MODULE_STATUS_ONLINE): - notOnlineModules.append(key) - continue + notOnlineModules.append(key) + continue for asic_id, asic in enumerate(module_info_dict[CHASSIS_MODULE_INFO_ASICS]): - asic_global_id, asic_pci_addr = asic - asic_key = "%s%s" % (CHASSIS_ASIC, asic_global_id) - asic_fvs = swsscommon.FieldValuePairs([(CHASSIS_ASIC_PCI_ADDRESS_FIELD, asic_pci_addr), - (CHASSIS_MODULE_INFO_NAME_FIELD, key), - (CHASSIS_ASIC_ID_IN_MODULE_FIELD, str(asic_id))]) - self.asic_table.set(asic_key, asic_fvs) + asic_global_id, asic_pci_addr = asic + asic_key = "%s%s" % (CHASSIS_ASIC, asic_global_id) + if not self._is_supervisor(): + asic_key = "%s|%s" % (key, asic_key) + + asic_fvs = swsscommon.FieldValuePairs([(CHASSIS_ASIC_PCI_ADDRESS_FIELD, asic_pci_addr), + (CHASSIS_MODULE_INFO_NAME_FIELD, key), + (CHASSIS_ASIC_ID_IN_MODULE_FIELD, str(asic_id))]) + self.asic_table.set(asic_key, asic_fvs) # Asics that are on the "not online" modules need to be cleaned up asics = list(self.asic_table.getKeys()) for asic in asics: - fvs = self.asic_table.get(asic) - if isinstance(fvs, list): - fvs = dict(fvs[-1]) - if fvs[CHASSIS_MODULE_INFO_NAME_FIELD] in notOnlineModules: - self.asic_table._del(asic) + fvs = self.asic_table.get(asic) + if isinstance(fvs, list): + fvs = dict(fvs[-1]) + if fvs[CHASSIS_MODULE_INFO_NAME_FIELD] in notOnlineModules: + self.asic_table._del(asic) def _get_module_info(self, module_index): """ @@ -403,15 +414,17 @@ class ChassisdDaemon(daemon_base.DaemonBase): self.log_error("Failed to load chassis due to {}".format(repr(e))) sys.exit(CHASSIS_LOAD_ERROR) - # Check if module list is populated - self.module_updater = ModuleUpdater(SYSLOG_IDENTIFIER, platform_chassis) - self.module_updater.modules_num_update() - # Check for valid slot numbers - self.module_updater.my_slot = try_get(platform_chassis.get_my_slot, + my_slot = try_get(platform_chassis.get_my_slot, default=INVALID_SLOT) - self.module_updater.supervisor_slot = try_get(platform_chassis.get_supervisor_slot, + supervisor_slot = try_get(platform_chassis.get_supervisor_slot, default=INVALID_SLOT) + + # Check if module list is populated + self.module_updater = ModuleUpdater(SYSLOG_IDENTIFIER, platform_chassis, my_slot, supervisor_slot) + self.module_updater.modules_num_update() + + if ((self.module_updater.my_slot == INVALID_SLOT) or (self.module_updater.supervisor_slot == INVALID_SLOT)): self.log_error("Chassisd not supported for this platform") diff --git a/sonic-chassisd/tests/test_chassisd.py b/sonic-chassisd/tests/test_chassisd.py index f3f36c3c3..484fe34a4 100644 --- a/sonic-chassisd/tests/test_chassisd.py +++ b/sonic-chassisd/tests/test_chassisd.py @@ -59,7 +59,8 @@ def test_moduleupdater_check_valid_fields(): chassis.module_list.append(module) - module_updater = ModuleUpdater(SYSLOG_IDENTIFIER, chassis) + module_updater = ModuleUpdater(SYSLOG_IDENTIFIER, chassis, slot, + module.supervisor_slot) module_updater.module_db_update() fvs = module_updater.module_table.get(name) assert desc == fvs[CHASSIS_MODULE_INFO_DESC_FIELD] @@ -82,7 +83,8 @@ def test_moduleupdater_check_invalid_name(): chassis.module_list.append(module) - module_updater = ModuleUpdater(SYSLOG_IDENTIFIER, chassis) + module_updater = ModuleUpdater(SYSLOG_IDENTIFIER, chassis, slot, + module.supervisor_slot) module_updater.module_db_update() fvs = module_updater.module_table.get(name) assert fvs == None @@ -102,7 +104,8 @@ def test_moduleupdater_check_status_update(): module.set_oper_status(status) chassis.module_list.append(module) - module_updater = ModuleUpdater(SYSLOG_IDENTIFIER, chassis) + module_updater = ModuleUpdater(SYSLOG_IDENTIFIER, chassis, slot, + module.supervisor_slot) module_updater.module_db_update() fvs = module_updater.module_table.get(name) print('Initial DB-entry {}'.format(fvs)) @@ -136,7 +139,8 @@ def test_moduleupdater_check_deinit(): module.set_oper_status(status) chassis.module_list.append(module) - module_updater = ModuleUpdater(SYSLOG_IDENTIFIER, chassis) + module_updater = ModuleUpdater(SYSLOG_IDENTIFIER, chassis, slot, + module.supervisor_slot) module_updater.modules_num_update() module_updater.module_db_update() fvs = module_updater.module_table.get(name) @@ -226,7 +230,8 @@ def test_configupdater_check_num_modules(): module = MockModule(index, name, desc, module_type, slot) # No modules - module_updater = ModuleUpdater(SYSLOG_IDENTIFIER, chassis) + module_updater = ModuleUpdater(SYSLOG_IDENTIFIER, chassis, slot, + module.supervisor_slot) module_updater.modules_num_update() fvs = module_updater.chassis_table.get(CHASSIS_INFO_KEY_TEMPLATE.format(1)) assert fvs == None @@ -274,7 +279,8 @@ def test_midplane_presence_modules(): chassis.module_list.append(fabric) #Run on supervisor - module_updater = ModuleUpdater(SYSLOG_IDENTIFIER, chassis) + module_updater = ModuleUpdater(SYSLOG_IDENTIFIER, chassis, slot, + module.supervisor_slot) module_updater.supervisor_slot = supervisor.get_slot() module_updater.my_slot = supervisor.get_slot() module_updater.modules_num_update() @@ -338,7 +344,8 @@ def test_midplane_presence_supervisor(): chassis.module_list.append(fabric) #Run on supervisor - module_updater = ModuleUpdater(SYSLOG_IDENTIFIER, chassis) + module_updater = ModuleUpdater(SYSLOG_IDENTIFIER, chassis, slot, + module.supervisor_slot) module_updater.supervisor_slot = supervisor.get_slot() module_updater.my_slot = module.get_slot() module_updater.modules_num_update() @@ -403,9 +410,9 @@ def test_asic_presence(): chassis.module_list.append(fabric) #Run on supervisor - module_updater = ModuleUpdater(SYSLOG_IDENTIFIER, chassis) - module_updater.supervisor_slot = supervisor.get_slot() - module_updater.my_slot = supervisor.get_slot() + module_updater = ModuleUpdater(SYSLOG_IDENTIFIER, chassis, + module.supervisor_slot, + module.supervisor_slot) module_updater.modules_num_update() module_updater.module_db_update() module_updater.check_midplane_reachability() diff --git a/sonic-psud/scripts/psud b/sonic-psud/scripts/psud index a8c529c62..96ba57935 100644 --- a/sonic-psud/scripts/psud +++ b/sonic-psud/scripts/psud @@ -57,6 +57,9 @@ PSU_INFO_VOLTAGE_MAX_TH_FIELD = 'voltage_max_threshold' PSU_INFO_VOLTAGE_MIN_TH_FIELD = 'voltage_min_threshold' PSU_INFO_CURRENT_FIELD = 'current' PSU_INFO_POWER_FIELD = 'power' +PSU_INFO_POWER_OVERLOAD = 'power_overload' +PSU_INFO_POWER_WARNING_SUPPRESS_THRESHOLD = 'power_warning_suppress_threshold' +PSU_INFO_POWER_CRITICAL_THRESHOLD = 'power_critical_threshold' PSU_INFO_FRU_FIELD = 'is_replaceable' PSU_INFO_IN_VOLTAGE_FIELD = 'input_voltage' PSU_INFO_IN_CURRENT_FIELD = 'input_current' @@ -283,6 +286,8 @@ class PsuStatus(object): self.power_good = True self.voltage_good = True self.temperature_good = True + self.check_psu_power_threshold = False + self.power_exceeded_threshold = False self.logger = logger def set_presence(self, presence): @@ -339,6 +344,13 @@ class PsuStatus(object): self.temperature_good = temperature_good return True + def set_power_exceed_threshold(self, power_exceeded_threshold): + if power_exceeded_threshold == self.power_exceeded_threshold: + return False + + self.power_exceeded_threshold = power_exceeded_threshold + return True + def is_ok(self): return self.presence and self.power_good and self.voltage_good and self.temperature_good @@ -486,6 +498,8 @@ class DaemonPsud(daemon_base.DaemonBase): 'PSU absence warning cleared: {} is inserted back.'.format(name), 'PSU absence warning: {} is not present.'.format(name) ) + if not psu_status.presence: + psu_status.check_psu_power_threshold = False if presence_changed or self.first_run: # Have to update PSU fan data here because PSU presence status changed. If we don't @@ -495,13 +509,46 @@ class DaemonPsud(daemon_base.DaemonBase): # every 60 seconds, it may still treat PSU state to "OK" and PSU LED to "red". self._update_psu_fan_data(psu, index) - if presence and psu_status.set_power_good(power_good): + power_good_changed = psu_status.set_power_good(power_good) + if presence and power_good_changed: set_led = True log_on_status_changed(self, psu_status.power_good, 'Power absence warning cleared: {} power is back to normal.'.format(name), 'Power absence warning: {} is out of power.'.format(name) ) + if presence and power_good_changed or self.first_run: + psu_status.check_psu_power_threshold = False + if psu_status.power_good: + # power_good has been updated and it is True, which means it was False + # Initialize power exceeding threshold state in this case + if (try_get(psu.get_psu_power_critical_threshold) and try_get(psu.get_psu_power_warning_suppress_threshold) and power != NOT_AVAILABLE): + psu_status.check_psu_power_threshold = True + + power_exceeded_threshold = psu_status.power_exceeded_threshold + power_warning_suppress_threshold = try_get(psu.get_psu_power_warning_suppress_threshold, NOT_AVAILABLE) + power_critical_threshold = try_get(psu.get_psu_power_critical_threshold, NOT_AVAILABLE) + if psu_status.check_psu_power_threshold: + if power_warning_suppress_threshold == NOT_AVAILABLE or power_critical_threshold == NOT_AVAILABLE: + self.log_error("PSU power thresholds become invalid: threshold {} critical threshold {}".format(power_warning_suppress_threshold, power_critical_threshold)) + psu_status.check_psu_power_threshold = False + psu_status.power_exceeded_threshold = False + elif psu_status.power_exceeded_threshold: + # The failing threshold is the warning threshold + if power < power_warning_suppress_threshold: + # Clear alarm + power_exceeded_threshold = False + else: + # The rising threshold is the critical threshold + if power >= power_critical_threshold: + # Raise alarm + power_exceeded_threshold = True + + if psu_status.set_power_exceed_threshold(power_exceeded_threshold): + log_on_status_changed(self, not psu_status.power_exceeded_threshold, + 'PSU power warning cleared: {} power {} is back to normal.'.format(name, power), + 'PSU power warning: {} power {} exceeds critical threshold {}.'.format(name, power, power_critical_threshold)) + if presence and psu_status.set_voltage(voltage, voltage_high_threshold, voltage_low_threshold): set_led = True log_on_status_changed(self, psu_status.voltage_good, @@ -532,6 +579,9 @@ class DaemonPsud(daemon_base.DaemonBase): (PSU_INFO_VOLTAGE_MAX_TH_FIELD, str(voltage_high_threshold)), (PSU_INFO_CURRENT_FIELD, str(current)), (PSU_INFO_POWER_FIELD, str(power)), + (PSU_INFO_POWER_WARNING_SUPPRESS_THRESHOLD, str(power_warning_suppress_threshold)), + (PSU_INFO_POWER_CRITICAL_THRESHOLD, str(power_critical_threshold)), + (PSU_INFO_POWER_OVERLOAD, str(power_exceeded_threshold)), (PSU_INFO_FRU_FIELD, str(is_replaceable)), (PSU_INFO_IN_CURRENT_FIELD, str(in_current)), (PSU_INFO_IN_VOLTAGE_FIELD, str(in_voltage)), diff --git a/sonic-psud/tests/mock_platform.py b/sonic-psud/tests/mock_platform.py index 2294533d6..5db3e394f 100644 --- a/sonic-psud/tests/mock_platform.py +++ b/sonic-psud/tests/mock_platform.py @@ -356,6 +356,12 @@ def set_status_led(self, color): self._status_led_color = color return True + def get_psu_power_critical_threshold(self): + raise NotImplementedError + + def get_psu_power_warning_suppress_threshold(self): + raise NotImplementedError + # Methods inherited from DeviceBase class and related setters def get_name(self): return self._name diff --git a/sonic-psud/tests/test_DaemonPsud.py b/sonic-psud/tests/test_DaemonPsud.py index f86a91231..482eb1cdd 100644 --- a/sonic-psud/tests/test_DaemonPsud.py +++ b/sonic-psud/tests/test_DaemonPsud.py @@ -143,16 +143,7 @@ def test_update_psu_data(self): expected_calls = [mock.call("Failed to update PSU data - Test message")] * 2 assert daemon_psud.log_warning.mock_calls == expected_calls - @mock.patch('psud._wrapper_get_psu_presence', mock.MagicMock()) - @mock.patch('psud._wrapper_get_psu_status', mock.MagicMock()) - def test_update_single_psu_data(self): - psud._wrapper_get_psu_presence.return_value = True - psud._wrapper_get_psu_status.return_value = True - - psu1 = MockPsu('PSU 1', 0, True, 'Fake Model', '12345678', '1234') - psud.platform_chassis = MockChassis() - psud.platform_chassis._psu_list.append(psu1) - + def _construct_expected_fvp(self, power=100.0, power_warning_suppress_threshold='N/A', power_critical_threshold='N/A', power_overload=False): expected_fvp = psud.swsscommon.FieldValuePairs( [(psud.PSU_INFO_MODEL_FIELD, 'Fake Model'), (psud.PSU_INFO_SERIAL_FIELD, '12345678'), @@ -163,17 +154,171 @@ def test_update_single_psu_data(self): (psud.PSU_INFO_VOLTAGE_MIN_TH_FIELD, '11.0'), (psud.PSU_INFO_VOLTAGE_MAX_TH_FIELD, '13.0'), (psud.PSU_INFO_CURRENT_FIELD, '8.0'), - (psud.PSU_INFO_POWER_FIELD, '100.0'), + (psud.PSU_INFO_POWER_FIELD, str(power)), + (psud.PSU_INFO_POWER_WARNING_SUPPRESS_THRESHOLD, str(power_warning_suppress_threshold)), + (psud.PSU_INFO_POWER_CRITICAL_THRESHOLD, str(power_critical_threshold)), + (psud.PSU_INFO_POWER_OVERLOAD, str(power_overload)), (psud.PSU_INFO_FRU_FIELD, 'True'), (psud.PSU_INFO_IN_VOLTAGE_FIELD, '220.25'), (psud.PSU_INFO_IN_CURRENT_FIELD, '0.72'), (psud.PSU_INFO_POWER_MAX_FIELD, 'N/A'), ]) + return expected_fvp + + @mock.patch('psud._wrapper_get_psu_presence', mock.MagicMock()) + @mock.patch('psud._wrapper_get_psu_status', mock.MagicMock()) + def test_update_single_psu_data(self): + psud._wrapper_get_psu_presence.return_value = True + psud._wrapper_get_psu_status.return_value = True + + psu1 = MockPsu('PSU 1', 0, True, 'Fake Model', '12345678', '1234') + psud.platform_chassis = MockChassis() + psud.platform_chassis._psu_list.append(psu1) + + expected_fvp = self._construct_expected_fvp() daemon_psud = psud.DaemonPsud(SYSLOG_IDENTIFIER) daemon_psud.psu_tbl = mock.MagicMock() daemon_psud._update_single_psu_data(1, psu1) daemon_psud.psu_tbl.set.assert_called_with(psud.PSU_INFO_KEY_TEMPLATE.format(1), expected_fvp) + assert not daemon_psud.psu_status_dict[1].check_psu_power_threshold + + @mock.patch('psud.daemon_base.db_connect', mock.MagicMock()) + def test_power_threshold(self): + psu = MockPsu('PSU 1', 0, True, 'Fake Model', '12345678', '1234') + psud.platform_chassis = MockChassis() + psud.platform_chassis._psu_list.append(psu) + + daemon_psud = psud.DaemonPsud(SYSLOG_IDENTIFIER) + + daemon_psud.psu_tbl = mock.MagicMock() + psu.get_psu_power_critical_threshold = mock.MagicMock(return_value=120.0) + psu.get_psu_power_warning_suppress_threshold = mock.MagicMock(return_value=110.0) + + # Normal start. All good and all thresholds are supported + # Power is in normal range (below warning threshold) + daemon_psud._update_single_psu_data(1, psu) + assert daemon_psud.psu_status_dict[1].check_psu_power_threshold + assert not daemon_psud.psu_status_dict[1].power_exceeded_threshold + expected_fvp = self._construct_expected_fvp(100.0, 110.0, 120.0, False) + daemon_psud.psu_tbl.set.assert_called_with(psud.PSU_INFO_KEY_TEMPLATE.format(1), expected_fvp) + daemon_psud._update_led_color() + assert psu.STATUS_LED_COLOR_GREEN == psu.get_status_led() + + daemon_psud.first_run = False + + # Power is increasing across the warning threshold + # Normal => (warning, critical) + psu.set_power(115.0) + daemon_psud._update_single_psu_data(1, psu) + assert daemon_psud.psu_status_dict[1].check_psu_power_threshold + assert not daemon_psud.psu_status_dict[1].power_exceeded_threshold + expected_fvp = self._construct_expected_fvp(115.0, 110.0, 120.0, False) + daemon_psud.psu_tbl.set.assert_called_with(psud.PSU_INFO_KEY_TEMPLATE.format(1), expected_fvp) + daemon_psud._update_led_color() + assert psu.STATUS_LED_COLOR_GREEN == psu.get_status_led() + + # Power is increasing across the critical threshold. Alarm raised + # (warning, critical) => (critical, ) + psu.set_power(125.0) + daemon_psud._update_single_psu_data(1, psu) + assert daemon_psud.psu_status_dict[1].check_psu_power_threshold + assert daemon_psud.psu_status_dict[1].power_exceeded_threshold + expected_fvp = self._construct_expected_fvp(125.0, 110.0, 120.0, True) + daemon_psud.psu_tbl.set.assert_called_with(psud.PSU_INFO_KEY_TEMPLATE.format(1), expected_fvp) + daemon_psud._update_led_color() + assert psu.STATUS_LED_COLOR_GREEN == psu.get_status_led() + + # Power is decreasing across the critical threshold. Alarm not cleared + # (critical, ) => (warning, critical) + psu.set_power(115.0) + daemon_psud._update_single_psu_data(1, psu) + assert daemon_psud.psu_status_dict[1].check_psu_power_threshold + assert daemon_psud.psu_status_dict[1].power_exceeded_threshold + expected_fvp = self._construct_expected_fvp(115.0, 110.0, 120.0, True) + daemon_psud.psu_tbl.set.assert_called_with(psud.PSU_INFO_KEY_TEMPLATE.format(1), expected_fvp) + daemon_psud._update_led_color() + assert psu.STATUS_LED_COLOR_GREEN == psu.get_status_led() + + # Power is decreasing across the warning threshold. Alarm cleared + # (warning, critical) => Normal + psu.set_power(105.0) + daemon_psud._update_single_psu_data(1, psu) + assert daemon_psud.psu_status_dict[1].check_psu_power_threshold + assert not daemon_psud.psu_status_dict[1].power_exceeded_threshold + expected_fvp = self._construct_expected_fvp(105.0, 110.0, 120.0, False) + daemon_psud.psu_tbl.set.assert_called_with(psud.PSU_INFO_KEY_TEMPLATE.format(1), expected_fvp) + assert psu.STATUS_LED_COLOR_GREEN == psu.get_status_led() + daemon_psud._update_led_color() + + # Power is increasing across the critical threshold. Alarm raised + # Normal => (critical, ) + psu.set_power(125.0) + daemon_psud._update_single_psu_data(1, psu) + assert daemon_psud.psu_status_dict[1].check_psu_power_threshold + assert daemon_psud.psu_status_dict[1].power_exceeded_threshold + expected_fvp = self._construct_expected_fvp(125.0, 110.0, 120.0, True) + daemon_psud.psu_tbl.set.assert_called_with(psud.PSU_INFO_KEY_TEMPLATE.format(1), expected_fvp) + daemon_psud._update_led_color() + assert psu.STATUS_LED_COLOR_GREEN == psu.get_status_led() + + # Power is increasing across the critical threshold. Alarm raised + # (critical, ) => Normal + psu.set_power(105.0) + daemon_psud._update_single_psu_data(1, psu) + assert daemon_psud.psu_status_dict[1].check_psu_power_threshold + assert not daemon_psud.psu_status_dict[1].power_exceeded_threshold + expected_fvp = self._construct_expected_fvp(105.0, 110.0, 120.0, False) + daemon_psud.psu_tbl.set.assert_called_with(psud.PSU_INFO_KEY_TEMPLATE.format(1), expected_fvp) + daemon_psud._update_led_color() + assert psu.STATUS_LED_COLOR_GREEN == psu.get_status_led() + + # PSU power becomes down + psu.set_status(False) + daemon_psud._update_single_psu_data(1, psu) + daemon_psud._update_led_color() + assert not daemon_psud.psu_status_dict[1].check_psu_power_threshold + assert not daemon_psud.psu_status_dict[1].power_exceeded_threshold + assert psu.STATUS_LED_COLOR_RED == psu.get_status_led() + + # PSU power becomes up + psu.set_status(True) + daemon_psud._update_single_psu_data(1, psu) + daemon_psud._update_led_color() + assert daemon_psud.psu_status_dict[1].check_psu_power_threshold + assert not daemon_psud.psu_status_dict[1].power_exceeded_threshold + assert psu.STATUS_LED_COLOR_GREEN == psu.get_status_led() + + # PSU becomes absent + psu.set_presence(False) + daemon_psud._update_single_psu_data(1, psu) + daemon_psud._update_led_color() + assert not daemon_psud.psu_status_dict[1].check_psu_power_threshold + assert not daemon_psud.psu_status_dict[1].power_exceeded_threshold + assert psu.STATUS_LED_COLOR_RED == psu.get_status_led() + + # PSU becomes present + psu.set_presence(True) + daemon_psud._update_single_psu_data(1, psu) + daemon_psud._update_led_color() + assert daemon_psud.psu_status_dict[1].check_psu_power_threshold + assert not daemon_psud.psu_status_dict[1].power_exceeded_threshold + assert psu.STATUS_LED_COLOR_GREEN == psu.get_status_led() + + # Thresholds become invalid on the fly + psu.get_psu_power_critical_threshold = mock.MagicMock(side_effect=NotImplementedError('')) + daemon_psud._update_single_psu_data(1, psu) + assert not daemon_psud.psu_status_dict[1].check_psu_power_threshold + assert not daemon_psud.psu_status_dict[1].power_exceeded_threshold + psu.get_psu_power_critical_threshold = mock.MagicMock(return_value=120.0) + daemon_psud.psu_status_dict[1].check_psu_power_threshold = True + daemon_psud._update_single_psu_data(1, psu) + assert daemon_psud.psu_status_dict[1].check_psu_power_threshold + assert not daemon_psud.psu_status_dict[1].power_exceeded_threshold + psu.get_psu_power_warning_suppress_threshold = mock.MagicMock(side_effect=NotImplementedError('')) + daemon_psud._update_single_psu_data(1, psu) + assert not daemon_psud.psu_status_dict[1].check_psu_power_threshold + assert not daemon_psud.psu_status_dict[1].power_exceeded_threshold def test_set_psu_led(self): mock_logger = mock.MagicMock() diff --git a/sonic-xcvrd/tests/test_xcvrd.py b/sonic-xcvrd/tests/test_xcvrd.py index e76bbf875..0e57407cb 100644 --- a/sonic-xcvrd/tests/test_xcvrd.py +++ b/sonic-xcvrd/tests/test_xcvrd.py @@ -80,6 +80,15 @@ def test_post_port_dom_info_to_db(self, mock_get_sfp_type): mock_get_sfp_type.return_value = 'QSFP_DD' post_port_dom_info_to_db(logical_port_name, port_mapping, dom_tbl, stop_event) + def test_post_port_dom_threshold_info_to_db(self, mock_get_sfp_type): + logical_port_name = "Ethernet0" + port_mapping = PortMapping() + stop_event = threading.Event() + dom_threshold_tbl = Table("STATE_DB", TRANSCEIVER_DOM_THRESHOLD_TABLE) + post_port_dom_info_to_db(logical_port_name, port_mapping, dom_threshold_tbl, stop_event) + mock_get_sfp_type.return_value = 'QSFP_DD' + post_port_dom_info_to_db(logical_port_name, port_mapping, dom_threshold_tbl, stop_event) + @patch('xcvrd.xcvrd_utilities.port_mapping.PortMapping.logical_port_name_to_physical_port_list', MagicMock(return_value=[0])) @patch('xcvrd.xcvrd._wrapper_get_presence', MagicMock(return_value=True)) @patch('xcvrd.xcvrd._wrapper_get_transceiver_pm', MagicMock(return_value={'prefec_ber_avg': '0.0003407240007014899', @@ -103,9 +112,10 @@ def test_del_port_sfp_dom_info_from_db(self): logical_port_name = "Ethernet0" port_mapping = PortMapping() dom_tbl = Table("STATE_DB", TRANSCEIVER_DOM_SENSOR_TABLE) + dom_threshold_tbl = Table("STATE_DB", TRANSCEIVER_DOM_THRESHOLD_TABLE) init_tbl = Table("STATE_DB", TRANSCEIVER_INFO_TABLE) pm_tbl = Table("STATE_DB", TRANSCEIVER_PM_TABLE) - del_port_sfp_dom_info_from_db(logical_port_name, port_mapping, init_tbl, dom_tbl, pm_tbl) + del_port_sfp_dom_info_from_db(logical_port_name, port_mapping, init_tbl, dom_tbl, dom_threshold_tbl, pm_tbl) @patch('xcvrd.xcvrd.get_physical_port_name_dict', MagicMock(return_value={0: 'Ethernet0'})) @patch('xcvrd.xcvrd._wrapper_get_presence', MagicMock(return_value=True)) @@ -169,8 +179,8 @@ def test_post_port_dom_threshold_info_to_db(self): logical_port_name = "Ethernet0" port_mapping = PortMapping() stop_event = threading.Event() - dom_tbl = Table("STATE_DB", TRANSCEIVER_DOM_SENSOR_TABLE) - post_port_dom_threshold_info_to_db(logical_port_name, port_mapping, dom_tbl, stop_event) + dom_threshold_tbl = Table("STATE_DB", TRANSCEIVER_DOM_THRESHOLD_TABLE) + post_port_dom_threshold_info_to_db(logical_port_name, port_mapping, dom_threshold_tbl, stop_event) @patch('xcvrd.xcvrd_utilities.port_mapping.PortMapping.logical_port_name_to_physical_port_list', MagicMock(return_value=[0])) @patch('xcvrd.xcvrd._wrapper_get_presence', MagicMock(return_value=True)) @@ -775,6 +785,7 @@ def test_SfpStateUpdateTask_handle_port_change_event(self, mock_update_status_hw mock_table_helper.get_status_tbl = MagicMock(return_value=mock_table) mock_table_helper.get_int_tbl = MagicMock(return_value=mock_table) mock_table_helper.get_dom_tbl = MagicMock(return_value=mock_table) + mock_table_helper.get_dom_threshold_tbl = MagicMock(return_value=mock_table) stopping_event = multiprocessing.Event() port_mapping = PortMapping() retry_eeprom_set = set() @@ -831,6 +842,7 @@ def test_SfpStateUpdateTask_retry_eeprom_reading(self, mock_update_status_hw, mo task.xcvr_table_helper = XcvrTableHelper(DEFAULT_NAMESPACE) task.xcvr_table_helper.get_intf_tbl = MagicMock(return_value=mock_table) task.xcvr_table_helper.get_dom_tbl = MagicMock(return_value=mock_table) + task.xcvr_table_helper.get_dom_threshold_tbl = MagicMock(return_value=mock_table) task.xcvr_table_helper.get_app_port_tbl = MagicMock(return_value=mock_table) task.xcvr_table_helper.get_status_tbl = MagicMock(return_value=mock_table) task.xcvr_table_helper.get_pm_tbl = MagicMock(return_value=mock_table) @@ -1011,9 +1023,13 @@ class MockTable: dom_tbl = MockTable() dom_tbl.get = MagicMock(return_value=(True, (('key3', 'value3'),))) dom_tbl.set = MagicMock() + dom_threshold_tbl = MockTable() + dom_threshold_tbl.get = MagicMock(return_value=(True, (('key4', 'value4'),))) + dom_threshold_tbl.set = MagicMock() mock_table_helper.get_status_tbl = MagicMock(return_value=status_tbl) mock_table_helper.get_intf_tbl = MagicMock(return_value=int_tbl) mock_table_helper.get_dom_tbl = MagicMock(return_value=dom_tbl) + mock_table_helper.get_dom_threshold_tbl = MagicMock(return_value=dom_threshold_tbl) port_mapping = PortMapping() retry_eeprom_set = set() @@ -1022,6 +1038,7 @@ class MockTable: task.xcvr_table_helper.get_status_tbl = mock_table_helper.get_status_tbl task.xcvr_table_helper.get_intf_tbl = mock_table_helper.get_intf_tbl task.xcvr_table_helper.get_dom_tbl = mock_table_helper.get_dom_tbl + task.xcvr_table_helper.get_dom_threshold_tbl = mock_table_helper.get_dom_threshold_tbl port_change_event = PortChangeEvent('Ethernet0', 1, 0, PortChangeEvent.PORT_ADD) task.port_mapping.handle_port_change_event(port_change_event) # SFP information is in the DB, copy the SFP information for the newly added logical port @@ -1032,6 +1049,8 @@ class MockTable: int_tbl.set.assert_called_with('Ethernet0', (('key2', 'value2'),)) dom_tbl.get.assert_called_with('Ethernet0') dom_tbl.set.assert_called_with('Ethernet0', (('key3', 'value3'),)) + dom_threshold_tbl.get.assert_called_with('Ethernet0') + dom_threshold_tbl.set.assert_called_with('Ethernet0', (('key4', 'value4'),)) status_tbl.get.return_value = (False, ()) mock_get_presence.return_value = True @@ -1060,7 +1079,7 @@ class MockTable: assert mock_post_dom_info.call_count == 1 mock_post_dom_info.assert_called_with('Ethernet0', task.port_mapping, dom_tbl) assert mock_post_dom_th.call_count == 1 - mock_post_dom_th.assert_called_with('Ethernet0', task.port_mapping, dom_tbl) + mock_post_dom_th.assert_called_with('Ethernet0', task.port_mapping, dom_threshold_tbl) assert mock_update_media_setting.call_count == 1 assert 'Ethernet0' not in task.retry_eeprom_set diff --git a/sonic-xcvrd/xcvrd/xcvrd.py b/sonic-xcvrd/xcvrd/xcvrd.py index 7a755cce2..8d021839e 100644 --- a/sonic-xcvrd/xcvrd/xcvrd.py +++ b/sonic-xcvrd/xcvrd/xcvrd.py @@ -8,7 +8,6 @@ try: import ast import copy - import functools import json import multiprocessing import os @@ -41,6 +40,7 @@ TRANSCEIVER_INFO_TABLE = 'TRANSCEIVER_INFO' TRANSCEIVER_DOM_SENSOR_TABLE = 'TRANSCEIVER_DOM_SENSOR' +TRANSCEIVER_DOM_THRESHOLD_TABLE = 'TRANSCEIVER_DOM_THRESHOLD' TRANSCEIVER_STATUS_TABLE = 'TRANSCEIVER_STATUS' TRANSCEIVER_PM_TABLE = 'TRANSCEIVER_PM' @@ -277,30 +277,18 @@ def beautify_dom_info_dict(dom_info_dict, physical_port): def beautify_dom_threshold_info_dict(dom_info_dict): - dom_info_dict['temphighalarm'] = strip_unit_and_beautify(dom_info_dict['temphighalarm'], TEMP_UNIT) - dom_info_dict['temphighwarning'] = strip_unit_and_beautify(dom_info_dict['temphighwarning'], TEMP_UNIT) - dom_info_dict['templowalarm'] = strip_unit_and_beautify(dom_info_dict['templowalarm'], TEMP_UNIT) - dom_info_dict['templowwarning'] = strip_unit_and_beautify(dom_info_dict['templowwarning'], TEMP_UNIT) - - dom_info_dict['vcchighalarm'] = strip_unit_and_beautify(dom_info_dict['vcchighalarm'], VOLT_UNIT) - dom_info_dict['vcchighwarning'] = strip_unit_and_beautify(dom_info_dict['vcchighwarning'], VOLT_UNIT) - dom_info_dict['vcclowalarm'] = strip_unit_and_beautify(dom_info_dict['vcclowalarm'], VOLT_UNIT) - dom_info_dict['vcclowwarning'] = strip_unit_and_beautify(dom_info_dict['vcclowwarning'], VOLT_UNIT) - - dom_info_dict['txpowerhighalarm'] = strip_unit_and_beautify(dom_info_dict['txpowerhighalarm'], POWER_UNIT) - dom_info_dict['txpowerlowalarm'] = strip_unit_and_beautify(dom_info_dict['txpowerlowalarm'], POWER_UNIT) - dom_info_dict['txpowerhighwarning'] = strip_unit_and_beautify(dom_info_dict['txpowerhighwarning'], POWER_UNIT) - dom_info_dict['txpowerlowwarning'] = strip_unit_and_beautify(dom_info_dict['txpowerlowwarning'], POWER_UNIT) - - dom_info_dict['rxpowerhighalarm'] = strip_unit_and_beautify(dom_info_dict['rxpowerhighalarm'], POWER_UNIT) - dom_info_dict['rxpowerlowalarm'] = strip_unit_and_beautify(dom_info_dict['rxpowerlowalarm'], POWER_UNIT) - dom_info_dict['rxpowerhighwarning'] = strip_unit_and_beautify(dom_info_dict['rxpowerhighwarning'], POWER_UNIT) - dom_info_dict['rxpowerlowwarning'] = strip_unit_and_beautify(dom_info_dict['rxpowerlowwarning'], POWER_UNIT) - - dom_info_dict['txbiashighalarm'] = strip_unit_and_beautify(dom_info_dict['txbiashighalarm'], BIAS_UNIT) - dom_info_dict['txbiaslowalarm'] = strip_unit_and_beautify(dom_info_dict['txbiaslowalarm'], BIAS_UNIT) - dom_info_dict['txbiashighwarning'] = strip_unit_and_beautify(dom_info_dict['txbiashighwarning'], BIAS_UNIT) - dom_info_dict['txbiaslowwarning'] = strip_unit_and_beautify(dom_info_dict['txbiaslowwarning'], BIAS_UNIT) + for k, v in dom_info_dict.items(): + if re.search('temp', k) is not None: + dom_info_dict[k] = strip_unit_and_beautify(v, TEMP_UNIT) + elif re.search('vcc', k) is not None: + dom_info_dict[k] = strip_unit_and_beautify(v, VOLT_UNIT) + elif re.search('power', k) is not None: + dom_info_dict[k] = strip_unit_and_beautify(v, POWER_UNIT) + elif re.search('txbias', k) is not None: + dom_info_dict[k] = strip_unit_and_beautify(v, BIAS_UNIT) + elif type(v) is not str: + # For all the other keys: + dom_info_dict[k] = str(v) def beautify_transceiver_status_dict(transceiver_status_dict, physical_port): @@ -486,28 +474,7 @@ def post_port_dom_threshold_info_to_db(logical_port_name, port_mapping, table, dom_th_info_cache[physical_port] = dom_info_dict if dom_info_dict is not None: beautify_dom_threshold_info_dict(dom_info_dict) - fvs = swsscommon.FieldValuePairs( - [('temphighalarm', dom_info_dict['temphighalarm']), - ('temphighwarning', dom_info_dict['temphighwarning']), - ('templowalarm', dom_info_dict['templowalarm']), - ('templowwarning', dom_info_dict['templowwarning']), - ('vcchighalarm', dom_info_dict['vcchighalarm']), - ('vcchighwarning', dom_info_dict['vcchighwarning']), - ('vcclowalarm', dom_info_dict['vcclowalarm']), - ('vcclowwarning', dom_info_dict['vcclowwarning']), - ('txpowerhighalarm', dom_info_dict['txpowerhighalarm']), - ('txpowerlowalarm', dom_info_dict['txpowerlowalarm']), - ('txpowerhighwarning', dom_info_dict['txpowerhighwarning']), - ('txpowerlowwarning', dom_info_dict['txpowerlowwarning']), - ('rxpowerhighalarm', dom_info_dict['rxpowerhighalarm']), - ('rxpowerlowalarm', dom_info_dict['rxpowerlowalarm']), - ('rxpowerhighwarning', dom_info_dict['rxpowerhighwarning']), - ('rxpowerlowwarning', dom_info_dict['rxpowerlowwarning']), - ('txbiashighalarm', dom_info_dict['txbiashighalarm']), - ('txbiaslowalarm', dom_info_dict['txbiaslowalarm']), - ('txbiashighwarning', dom_info_dict['txbiashighwarning']), - ('txbiaslowwarning', dom_info_dict['txbiaslowwarning']) - ]) + fvs = swsscommon.FieldValuePairs([(k, v) for k, v in dom_info_dict.items()]) table.set(port_name, fvs) else: return SFP_EEPROM_NOT_READY @@ -598,7 +565,7 @@ def post_port_sfp_dom_info_to_db(is_warm_start, port_mapping, xcvr_table_helper, rc = post_port_sfp_info_to_db(logical_port_name, port_mapping, xcvr_table_helper.get_intf_tbl(asic_index), transceiver_dict, stop_event) if rc != SFP_EEPROM_NOT_READY: post_port_dom_info_to_db(logical_port_name, port_mapping, xcvr_table_helper.get_dom_tbl(asic_index), stop_event) - post_port_dom_threshold_info_to_db(logical_port_name, port_mapping, xcvr_table_helper.get_dom_tbl(asic_index), stop_event) + post_port_dom_threshold_info_to_db(logical_port_name, port_mapping, xcvr_table_helper.get_dom_threshold_tbl(asic_index), stop_event) update_port_transceiver_status_table_hw(logical_port_name, port_mapping, xcvr_table_helper.get_status_tbl(asic_index), @@ -617,13 +584,15 @@ def post_port_sfp_dom_info_to_db(is_warm_start, port_mapping, xcvr_table_helper, # Delete port dom/sfp info from db -def del_port_sfp_dom_info_from_db(logical_port_name, port_mapping, int_tbl, dom_tbl, pm_tbl): +def del_port_sfp_dom_info_from_db(logical_port_name, port_mapping, int_tbl, dom_tbl, dom_threshold_tbl, pm_tbl): for physical_port_name in get_physical_port_name_dict(logical_port_name, port_mapping).values(): try: if int_tbl: int_tbl._del(physical_port_name) if dom_tbl: dom_tbl._del(physical_port_name) + if dom_threshold_tbl: + dom_threshold_tbl._del(physical_port_name) if pm_tbl: pm_tbl._del(physical_port_name) @@ -1343,7 +1312,7 @@ def configure_tx_output_power(self, api, lport, tx_power): self.log_error("{} configured tx power {} > maximum power {} supported".format(lport, tx_power, max_p)) return api.set_tx_power(tx_power) - def configure_laser_frequency(self, api, lport, freq): + def configure_laser_frequency(self, api, lport, freq, grid=75): _, _, _, lowf, highf = api.get_supported_freq_config() if freq < lowf: self.log_error("{} configured freq:{} GHz is lower than the supported freq:{} GHz".format(lport, freq, lowf)) @@ -1354,7 +1323,7 @@ def configure_laser_frequency(self, api, lport, freq): self.log_error("{} configured freq:{} GHz is NOT in 75GHz grid".format(lport, freq)) if api.get_tuning_in_progress(): self.log_error("{} Tuning in progress, channel selection may fail!".format(lport)) - return api.set_laser_freq(freq) + return api.set_laser_freq(freq, grid) def wait_for_port_config_done(self, namespace): # Connect to APPL_DB and subscribe to PORT table notifications @@ -1694,7 +1663,7 @@ def task_worker(self): if not sfp_status_helper.detect_port_in_error_status(logical_port_name, self.xcvr_table_helper.get_status_tbl(asic_index)): post_port_dom_info_to_db(logical_port_name, self.port_mapping, self.xcvr_table_helper.get_dom_tbl(asic_index), self.task_stopping_event, dom_info_cache=dom_info_cache) - post_port_dom_threshold_info_to_db(logical_port_name, self.port_mapping, self.xcvr_table_helper.get_dom_tbl(asic_index), self.task_stopping_event, dom_th_info_cache=dom_th_info_cache) + post_port_dom_threshold_info_to_db(logical_port_name, self.port_mapping, self.xcvr_table_helper.get_dom_threshold_tbl(asic_index), self.task_stopping_event, dom_th_info_cache=dom_th_info_cache) update_port_transceiver_status_table_hw(logical_port_name, self.port_mapping, self.xcvr_table_helper.get_status_tbl(asic_index), @@ -1733,6 +1702,7 @@ def on_remove_logical_port(self, port_change_event): self.port_mapping, None, self.xcvr_table_helper.get_dom_tbl(port_change_event.asic_id), + self.xcvr_table_helper.get_dom_threshold_tbl(port_change_event.asic_id), self.xcvr_table_helper.get_pm_tbl(port_change_event.asic_id)) delete_port_from_status_table_hw(port_change_event.port_name, self.port_mapping, @@ -1858,9 +1828,8 @@ def task_worker(self, stopping_event, sfp_error_event): timeout = RETRY_PERIOD_FOR_SYSTEM_READY_MSECS state = STATE_INIT sel, asic_context = port_mapping.subscribe_port_config_change(self.namespaces) - port_change_event_handler = functools.partial(self.on_port_config_change, stopping_event) while not stopping_event.is_set(): - port_mapping.handle_port_config_change(sel, asic_context, stopping_event, self.port_mapping, helper_logger, port_change_event_handler) + port_mapping.handle_port_config_change(sel, asic_context, stopping_event, self.port_mapping, helper_logger, self.on_port_config_change) # Retry those logical ports whose EEPROM reading failed or timeout when the SFP is inserted self.retry_eeprom_reading() @@ -1957,7 +1926,7 @@ def task_worker(self, stopping_event, sfp_error_event): if rc != SFP_EEPROM_NOT_READY: post_port_dom_info_to_db(logical_port, self.port_mapping, self.xcvr_table_helper.get_dom_tbl(asic_index)) - post_port_dom_threshold_info_to_db(logical_port, self.port_mapping, self.xcvr_table_helper.get_dom_tbl(asic_index)) + post_port_dom_threshold_info_to_db(logical_port, self.port_mapping, self.xcvr_table_helper.get_dom_threshold_tbl(asic_index)) update_port_transceiver_status_table_hw(logical_port, self.port_mapping, self.xcvr_table_helper.get_status_tbl(asic_index)) post_port_pm_info_to_db(logical_port, self.port_mapping, self.xcvr_table_helper.get_pm_tbl(asic_index)) notify_media_setting(logical_port, transceiver_dict, self.xcvr_table_helper.get_app_port_tbl(asic_index), self.port_mapping) @@ -1970,6 +1939,7 @@ def task_worker(self, stopping_event, sfp_error_event): del_port_sfp_dom_info_from_db(logical_port, self.port_mapping, self.xcvr_table_helper.get_intf_tbl(asic_index), self.xcvr_table_helper.get_dom_tbl(asic_index), + self.xcvr_table_helper.get_dom_threshold_tbl(asic_index), self.xcvr_table_helper.get_pm_tbl(asic_index)) delete_port_from_status_table_hw(logical_port, self.port_mapping, self.xcvr_table_helper.get_status_tbl(asic_index)) else: @@ -1997,6 +1967,7 @@ def task_worker(self, stopping_event, sfp_error_event): self.port_mapping, None, self.xcvr_table_helper.get_dom_tbl(asic_index), + self.xcvr_table_helper.get_dom_threshold_tbl(asic_index), self.xcvr_table_helper.get_pm_tbl(asic_index)) delete_port_from_status_table_hw(logical_port, self.port_mapping, self.xcvr_table_helper.get_status_tbl(asic_index)) except (TypeError, ValueError) as e: @@ -2073,6 +2044,7 @@ def on_remove_logical_port(self, port_change_event): self.port_mapping, self.xcvr_table_helper.get_intf_tbl(port_change_event.asic_id), self.xcvr_table_helper.get_dom_tbl(port_change_event.asic_id), + self.xcvr_table_helper.get_dom_threshold_tbl(port_change_event.asic_id), self.xcvr_table_helper.get_pm_tbl(port_change_event.asic_id)) delete_port_from_status_table_sw(port_change_event.port_name, self.xcvr_table_helper.get_status_tbl(port_change_event.asic_id)) delete_port_from_status_table_hw(port_change_event.port_name, @@ -2108,6 +2080,7 @@ def on_add_logical_port(self, port_change_event): status_tbl = self.xcvr_table_helper.get_status_tbl(port_change_event.asic_id) int_tbl = self.xcvr_table_helper.get_intf_tbl(port_change_event.asic_id) dom_tbl = self.xcvr_table_helper.get_dom_tbl(port_change_event.asic_id) + dom_threshold_tbl = self.xcvr_table_helper.get_dom_threshold_tbl(port_change_event.asic_id) pm_tbl = self.xcvr_table_helper.get_pm_tbl(port_change_event.asic_id) physical_port_list = self.port_mapping.logical_port_name_to_physical_port_list(port_change_event.port_name) @@ -2136,6 +2109,9 @@ def on_add_logical_port(self, port_change_event): found, dom_info = dom_tbl.get(sibling_port) if found: dom_tbl.set(port_change_event.port_name, dom_info) + found, dom_threshold_info = dom_threshold_tbl.get(sibling_port) + if found: + dom_threshold_tbl.set(port_change_event.port_name, dom_threshold_info) else: error_description = 'N/A' status = None @@ -2171,7 +2147,7 @@ def on_add_logical_port(self, port_change_event): self.retry_eeprom_set.add(port_change_event.port_name) else: post_port_dom_info_to_db(port_change_event.port_name, self.port_mapping, dom_tbl) - post_port_dom_threshold_info_to_db(port_change_event.port_name, self.port_mapping, dom_tbl) + post_port_dom_threshold_info_to_db(port_change_event.port_name, self.port_mapping, dom_threshold_tbl) update_port_transceiver_status_table_hw(port_change_event.port_name, self.port_mapping, status_tbl) @@ -2204,7 +2180,7 @@ def retry_eeprom_reading(self): rc = post_port_sfp_info_to_db(logical_port, self.port_mapping, self.xcvr_table_helper.get_intf_tbl(asic_index), transceiver_dict) if rc != SFP_EEPROM_NOT_READY: post_port_dom_info_to_db(logical_port, self.port_mapping, self.xcvr_table_helper.get_dom_tbl(asic_index)) - post_port_dom_threshold_info_to_db(logical_port, self.port_mapping, self.xcvr_table_helper.get_dom_tbl(asic_index)) + post_port_dom_threshold_info_to_db(logical_port, self.port_mapping, self.xcvr_table_helper.get_dom_threshold_tbl(asic_index)) update_port_transceiver_status_table_hw(logical_port, self.port_mapping, self.xcvr_table_helper.get_status_tbl(asic_index)) post_port_pm_info_to_db(logical_port, self.port_mapping, self.xcvr_table_helper.get_pm_tbl(asic_index)) notify_media_setting(logical_port, transceiver_dict, self.xcvr_table_helper.get_app_port_tbl(asic_index), self.port_mapping) @@ -2358,6 +2334,7 @@ def deinit(self): del_port_sfp_dom_info_from_db(logical_port_name, port_mapping_data, self.xcvr_table_helper.get_intf_tbl(asic_index), self.xcvr_table_helper.get_dom_tbl(asic_index), + self.xcvr_table_helper.get_dom_threshold_tbl(asic_index), self.xcvr_table_helper.get_pm_tbl(asic_index)) delete_port_from_status_table_sw(logical_port_name, self.xcvr_table_helper.get_status_tbl(asic_index)) delete_port_from_status_table_hw(logical_port_name, port_mapping_data, self.xcvr_table_helper.get_status_tbl(asic_index)) @@ -2416,7 +2393,7 @@ def run(self): class XcvrTableHelper: def __init__(self, namespaces): - self.int_tbl, self.dom_tbl, self.status_tbl, self.app_port_tbl, \ + self.int_tbl, self.dom_tbl, self.dom_threshold_tbl, self.status_tbl, self.app_port_tbl, \ self.cfg_port_tbl, self.state_port_tbl, self.pm_tbl = {}, {}, {}, {}, {}, {}, {} self.state_db = {} self.cfg_db = {} @@ -2425,6 +2402,7 @@ def __init__(self, namespaces): self.state_db[asic_id] = daemon_base.db_connect("STATE_DB", namespace) self.int_tbl[asic_id] = swsscommon.Table(self.state_db[asic_id], TRANSCEIVER_INFO_TABLE) self.dom_tbl[asic_id] = swsscommon.Table(self.state_db[asic_id], TRANSCEIVER_DOM_SENSOR_TABLE) + self.dom_threshold_tbl[asic_id] = swsscommon.Table(self.state_db[asic_id], TRANSCEIVER_DOM_THRESHOLD_TABLE) self.status_tbl[asic_id] = swsscommon.Table(self.state_db[asic_id], TRANSCEIVER_STATUS_TABLE) self.pm_tbl[asic_id] = swsscommon.Table(self.state_db[asic_id], TRANSCEIVER_PM_TABLE) self.state_port_tbl[asic_id] = swsscommon.Table(self.state_db[asic_id], swsscommon.STATE_PORT_TABLE_NAME) @@ -2439,6 +2417,9 @@ def get_intf_tbl(self, asic_id): def get_dom_tbl(self, asic_id): return self.dom_tbl[asic_id] + def get_dom_threshold_tbl(self, asic_id): + return self.dom_threshold_tbl[asic_id] + def get_status_tbl(self, asic_id): return self.status_tbl[asic_id] diff --git a/sonic-ycabled/tests/test_y_cable_helper.py b/sonic-ycabled/tests/test_y_cable_helper.py index 752f99129..43226d33b 100644 --- a/sonic-ycabled/tests/test_y_cable_helper.py +++ b/sonic-ycabled/tests/test_y_cable_helper.py @@ -5798,3 +5798,25 @@ def test_apply_grpc_secrets_configuration(self, open): patched_util.return_value = parsed_data rc = apply_grpc_secrets_configuration(None) assert(rc == None) + + + + def test_handle_ycable_active_standby_probe_notification(self): + + test_db = "TEST_DB" + status = True + port_m = "Ethernet0" + fvp_m = [('command', "probe"), ('read_side', 1), ('cable_type','active-standby'), ('soc_ipv4','192.168.0.1')] + fvp_dict = {"command": "probe"} + hw_mux_cable_tbl = {} + y_cable_response_tbl = {} + asic_index = 0 + hw_mux_cable_tbl[asic_index] = swsscommon.Table( + test_db[asic_index], "PORT_INFO_TABLE") + y_cable_response_tbl[asic_index] = swsscommon.Table( + test_db[asic_index], "PORT_INFO_TABLE") + hw_mux_cable_tbl[asic_index].get.return_value = (status, fvp_m) + + rc = handle_ycable_active_standby_probe_notification("active-standby", fvp_dict, test_db, hw_mux_cable_tbl, port_m, asic_index, y_cable_response_tbl) + assert(rc == True) + diff --git a/sonic-ycabled/tests/test_ycable.py b/sonic-ycabled/tests/test_ycable.py index 29e7263ae..864b40865 100644 --- a/sonic-ycabled/tests/test_ycable.py +++ b/sonic-ycabled/tests/test_ycable.py @@ -8,6 +8,7 @@ import os import sys import time +import traceback if sys.version_info >= (3, 3): from unittest.mock import MagicMock, patch @@ -40,20 +41,20 @@ def test_ycable_info_helper_class_run(self, mocked_sleep): with patch('ycable.ycable.platform_sfputil') as patched_util: patched_util.logical.return_value = ['Ethernet0', 'Ethernet4'] patched_util.get_asic_id_for_logical_port.return_value = 0 - Y_cable_state_task = YcableStateUpdateTask() - Y_cable_state_task.task_process = MagicMock() - Y_cable_state_task.task_stopping_event = MagicMock() + y_cable_presence = [True] stopping_event = MagicMock() sfp_error_event = MagicMock() - y_cable_presence = [True] - Y_cable_state_task.task_run(sfp_error_event, y_cable_presence) - Y_cable_state_task.task_stop() - Y_cable_task = YcableInfoUpdateTask() + Y_cable_state_task = YcableStateUpdateTask(sfp_error_event, y_cable_presence) + Y_cable_state_task.task_process = MagicMock() + Y_cable_state_task.task_stopping_event = MagicMock() + Y_cable_state_task.start() + Y_cable_state_task.join() + Y_cable_task = YcableInfoUpdateTask(y_cable_presence) Y_cable_task.task_thread = MagicMock() Y_cable_task.task_stopping_event = MagicMock() Y_cable_task.task_stopping_event.is_set = MagicMock() - Y_cable_task.task_run(y_cable_presence) - Y_cable_task.task_stop() + Y_cable_task.start() + Y_cable_task.join() Y_cable_state_task.task_stopping_event.return_value.is_set.return_value = True #Y_cable_state_task.task_worker(stopping_event, sfp_error_event, y_cable_presence) # For now just check if exception is thrown for UT purposes @@ -67,19 +68,20 @@ def test_ycable_info_helper_class_run(self, mocked_sleep): @patch("swsscommon.swsscommon.Select.select", MagicMock()) def test_ycable_helper_class_run_loop(self): Y_cable_task = YCableTableUpdateTask() + Y_cable_cli_task = YCableCliUpdateTask() Y_cable_task.task_stopping_event = MagicMock() + Y_cable_cli_task.task_stopping_event = MagicMock() Y_cable_task.task_thread = MagicMock() Y_cable_task.task_thread.start = MagicMock() Y_cable_task.task_thread.join = MagicMock() - Y_cable_task.task_cli_thead = MagicMock() - Y_cable_task.task_cli_thead.start = MagicMock() - Y_cable_task.task_cli_thead.join = MagicMock() #Y_cable_task.task_stopping_event.return_value.is_set.return_value = False swsscommon.SubscriberStateTable.return_value.pop.return_value = (True, True, {"read_side": "2"}) Y_cable_task.task_worker() - Y_cable_task.task_cli_worker() - Y_cable_task.task_run() - Y_cable_task.task_stop() + Y_cable_task.start() + Y_cable_task.join() + Y_cable_cli_task.task_cli_worker() + Y_cable_cli_task.start() + Y_cable_cli_task.join() @patch("swsscommon.swsscommon.Select", MagicMock()) @patch("swsscommon.swsscommon.Select.addSelectable", MagicMock()) @@ -89,14 +91,10 @@ def test_ycable_helper_class_run(self): Y_cable_task.task_thread = MagicMock() Y_cable_task.task_thread.start = MagicMock() Y_cable_task.task_thread.join = MagicMock() - Y_cable_task.task_cli_thead = MagicMock() - Y_cable_task.task_cli_thead.start = MagicMock() - Y_cable_task.task_cli_thead.join = MagicMock() Y_cable_task.task_stopping_event.return_value.is_set.return_value = True Y_cable_task.task_worker() - Y_cable_task.task_cli_worker() - Y_cable_task.task_run() - Y_cable_task.task_stop() + Y_cable_task.start() + Y_cable_task.join() def test_detect_port_in_error_status(self): @@ -291,9 +289,7 @@ def test_DaemonYcable_init_deinit(self): @patch('ycable.ycable.platform_sfputil', MagicMock()) @patch('ycable.ycable.DaemonYcable.load_platform_util', MagicMock()) @patch('ycable.ycable.YcableInfoUpdateTask', MagicMock()) - @patch('ycable.ycable.YcableInfoUpdateTask.task_run', MagicMock()) @patch('ycable.ycable.YcableStateUpdateTask', MagicMock()) - @patch('ycable.ycable.YcableStateUpdateTask.task_run', MagicMock()) @patch('ycable.ycable_utilities.y_cable_helper.init_ports_status_for_y_cable', MagicMock()) def test_DaemonYcable_init_deinit_full(self): ycable = DaemonYcable(SYSLOG_IDENTIFIER) @@ -327,3 +323,45 @@ def wait_until(total_wait_time, interval, call_back, *args, **kwargs): time.sleep(interval) wait_time += interval return False + + +class TestYcableScriptException(object): + + @patch("swsscommon.swsscommon.Select", MagicMock(side_effect=NotImplementedError)) + @patch("swsscommon.swsscommon.Select.addSelectable", MagicMock(side_effect=NotImplementedError)) + @patch("swsscommon.swsscommon.Select.select", MagicMock(side_effect=NotImplementedError)) + def test_ycable_helper_class_run_loop_with_exception(self): + + + + Y_cable_cli_task = YCableCliUpdateTask() + expected_exception_start = None + expected_exception_join = None + trace = None + try: + Y_cable_cli_task.start() + Y_cable_cli_task.task_cli_worker() + except Exception as e1: + expected_exception_start = e1 + trace = traceback.format_exc() + + + try: + Y_cable_cli_task.join() + except Exception as e2: + expected_exception_join = e2 + + """ + #Handy debug Helpers or else use import logging + #f = open("newfile", "w") + #f.write(format(e2)) + #f.write(format(m1)) + #f.write(trace) + """ + + assert(type(expected_exception_start) == type(expected_exception_join)) + assert(expected_exception_start.args == expected_exception_join.args) + assert("NotImplementedError" in str(trace) and "effect" in str(trace)) + assert("sonic-ycabled/ycable/ycable_utilities/y_cable_helper.py" in str(trace)) + assert("swsscommon.Select" in str(trace)) + diff --git a/sonic-ycabled/ycable/ycable.py b/sonic-ycabled/ycable/ycable.py index 90d71ee58..fd3e0ecf5 100644 --- a/sonic-ycabled/ycable/ycable.py +++ b/sonic-ycabled/ycable/ycable.py @@ -6,10 +6,12 @@ """ try: + import os import signal import sys import time import threading + import traceback from enum import Enum from sonic_py_common import daemon_base, device_info, logger @@ -94,10 +96,14 @@ def handle_state_update_task(port, fvp_dict, y_cable_presence, stopping_event): # Thread wrapper class to update ycable info periodically -class YcableInfoUpdateTask(object): - def __init__(self): - self.task_thread = None + +class YcableInfoUpdateTask(threading.Thread): + + def __init__(self, y_cable_presence): + threading.Thread.__init__(self) + self.exc = None self.task_stopping_event = threading.Event() + self.y_cable_presence = y_cable_presence self.table_helper = y_cable_table_helper.YcableInfoUpdateTableHelper() @@ -122,25 +128,36 @@ def task_worker(self, y_cable_presence): helper_logger.log_info("Stop DOM monitoring loop") - def task_run(self, y_cable_presence): + def run(self): if self.task_stopping_event.is_set(): return - self.task_thread = threading.Thread(target=self.task_worker, args=(y_cable_presence,)) - self.task_thread.start() + try: + self.task_worker(self.y_cable_presence) + except Exception as e: + helper_logger.log_error("Exception occured at child thread YcableInfoUpdateTask due to {} {}".format(repr(e), traceback.format_exc())) + + self.exc = e - def task_stop(self): - self.task_stopping_event.set() - self.task_thread.join() + def join(self): + threading.Thread.join(self) + + if self.exc: + raise self.exc # Process wrapper class to update sfp state info periodically -class YcableStateUpdateTask(object): - def __init__(self): - self.task_process = None + + +class YcableStateUpdateTask(threading.Thread): + def __init__(self, sfp_error_event, y_cable_presence): + threading.Thread.__init__(self) + self.exc = None self.task_stopping_event = threading.Event() self.sfp_insert_events = {} + self.sfp_error_event = sfp_error_event + self.y_cable_presence = y_cable_presence self.table_helper = y_cable_table_helper.YcableStateUpdateTableHelper() @@ -192,18 +209,21 @@ def task_worker(self, stopping_event, sfp_error_event, y_cable_presence): handle_state_update_task(port, fvp_dict, y_cable_presence, stopping_event) - - def task_run(self, sfp_error_event, y_cable_presence): + def run(self): if self.task_stopping_event.is_set(): return - self.task_process = threading.Thread(target=self.task_worker, args=( - self.task_stopping_event, sfp_error_event, y_cable_presence)) - self.task_process.start() + try: + self.task_worker(self.task_stopping_event, self.sfp_error_event, self.y_cable_presence) + except Exception as e: + helper_logger.log_error("Exception occured at child thread YcableStateUpdateTask due to {} {}".format(repr(e), traceback.format_exc())) + self.exc = e + + def join(self): + threading.Thread.join(self) - def task_stop(self): - self.task_stopping_event.set() - self.task_process.join() + if self.exc: + raise self.exc # # Daemon ======================================================================= @@ -220,6 +240,7 @@ def __init__(self, log_identifier): self.sfp_error_event = threading.Event() self.y_cable_presence = [False] self.table_helper = y_cable_table_helper.DaemonYcableTableHelper() + self.threads = [] # Signal handler def signal_handler(self, sig, frame): @@ -349,36 +370,58 @@ def run(self): self.init() # Start the ycable task update thread - ycable_info_update = YcableInfoUpdateTask() - ycable_info_update.task_run(self.y_cable_presence) + ycable_info_update = YcableInfoUpdateTask(self.y_cable_presence) + ycable_info_update.start() + self.threads.append(ycable_info_update) # Start the sfp state info update process - ycable_state_update = YcableStateUpdateTask() - ycable_state_update.task_run(self.sfp_error_event, self.y_cable_presence) + ycable_state_update = YcableStateUpdateTask(self.sfp_error_event, self.y_cable_presence) + ycable_state_update.start() + self.threads.append(ycable_state_update) # Start the Y-cable state info update process if Y cable presence established y_cable_state_worker_update = None if self.y_cable_presence[0] is True: y_cable_state_worker_update = y_cable_helper.YCableTableUpdateTask() - y_cable_state_worker_update.task_run() + y_cable_state_worker_update.start() + self.threads.append(y_cable_state_worker_update) + y_cable_cli_worker_update = y_cable_helper.YCableCliUpdateTask() + y_cable_cli_worker_update.start() + self.threads.append(y_cable_cli_worker_update) # Start main loop self.log_info("Start daemon main loop") while not self.stop_event.wait(self.timeout): self.log_info("Ycable main loop") + # check all threads are alive + for thread in self.threads: + if thread.is_alive() is False: + try: + thread.join() + except Exception as e: + self.log_error("Exception occured at child thread {} to {}".format(thread.getName(), repr(e))) + self.log_error("thread id {} is not running, exiting main loop".format(thread.getName())) + os.kill(os.getpid(), signal.SIGKILL) - self.log_info("Stop daemon main loop") + + self.log_error("Stop daemon main loop") # Stop the ycable periodic info info update thread - ycable_info_update.task_stop() + if ycable_info_update.is_alive(): + ycable_info_update.join() # Stop the ycable update process - ycable_state_update.task_stop() + if ycable_state_update.is_alive(): + ycable_state_update.join() # Stop the Y-cable state info update process if self.y_cable_presence[0] is True: - y_cable_state_worker_update.task_stop() + if y_cable_state_worker_update.is_alive(): + y_cable_state_worker_update.join() + if y_cable_cli_worker_update.is_alive(): + y_cable_cli_worker_update.join() + # Start daemon deinitialization sequence self.deinit() diff --git a/sonic-ycabled/ycable/ycable_utilities/y_cable_helper.py b/sonic-ycabled/ycable/ycable_utilities/y_cable_helper.py index c9d53823f..1e03633c5 100644 --- a/sonic-ycabled/ycable/ycable_utilities/y_cable_helper.py +++ b/sonic-ycabled/ycable/ycable_utilities/y_cable_helper.py @@ -11,6 +11,7 @@ import sys import threading import time +import traceback from importlib import import_module @@ -3410,6 +3411,29 @@ def handle_hw_mux_cable_table_grpc_notification(fvp, hw_mux_cable_tbl, asic_inde helper_logger.log_info("Got a change event on port {} of table {} that does not contain state".format( port, swsscommon.APP_HW_MUX_CABLE_TABLE_NAME)) + +def handle_ycable_active_standby_probe_notification(cable_type, fvp_dict, appl_db, hw_mux_cable_tbl, port_m, asic_index, y_cable_response_tbl): + + if cable_type == 'active-standby' and "command" in fvp_dict: + + # check if xcvrd got a probe command + probe_identifier = fvp_dict["command"] + + if probe_identifier == "probe": + + (status, fv) = hw_mux_cable_tbl[asic_index].get(port_m) + + if status is False: + helper_logger.log_warning("Could not retreive fieldvalue pairs for {}, inside state_db table {}".format( + port_m, hw_mux_cable_tbl[asic_index].getTableName())) + return False + + mux_port_dict = dict(fv) + read_side = mux_port_dict.get("read_side") + update_appdb_port_mux_cable_response_table(port_m, asic_index, appl_db, int(read_side), y_cable_response_tbl) + + return True + def handle_ycable_enable_disable_tel_notification(fvp_m, key): global disable_telemetry @@ -3441,16 +3465,15 @@ def handle_ycable_enable_disable_tel_notification(fvp_m, key): disable_telemetry = False # Thread wrapper class to update y_cable status periodically -class YCableTableUpdateTask(object): +class YCableTableUpdateTask(threading.Thread): def __init__(self): - self.task_thread = None - self.task_cli_thread = None - self.task_download_firmware_thread = {} + threading.Thread.__init__(self) + + self.exc = None self.task_stopping_event = threading.Event() self.hw_mux_cable_tbl_keys = {} self.table_helper = y_cable_table_helper.YcableTableUpdateTableHelper() - self.cli_table_helper = y_cable_table_helper.YcableCliUpdateTableHelper() def task_worker(self): @@ -3576,21 +3599,8 @@ def task_worker(self): (status, cable_type) = check_mux_cable_port_type(port_m, self.table_helper.get_port_tbl(), asic_index) if status: + handle_ycable_active_standby_probe_notification(cable_type, fvp_dict, self.table_helper.get_appl_db(), self.table_helper.get_hw_mux_cable_tbl(), port_m, asic_index, self.table_helper.get_y_cable_response_tbl()) - if cable_type == 'active-standby' and "command" in fvp_dict: - - # check if xcvrd got a probe command - probe_identifier = fvp_dict["command"] - - if probe_identifier == "probe": - (status, fv) = self.table_helper.get_hw_mux_cable_tbl()[asic_index].get(port_m) - if status is False: - helper_logger.log_warning("Could not retreive fieldvalue pairs for {}, inside state_db table {}".format( - port_m, self.table_helper.get_hw_mux_cable_tbl()[asic_index].getTableName())) - continue - mux_port_dict = dict(fv) - read_side = mux_port_dict.get("read_side") - update_appdb_port_mux_cable_response_table(port_m, asic_index, self.appl_db, int(read_side), self.table_helper.get_y_cable_response_tbl()) while True: (port_m, op_m, fvp_m) = self.table_helper.get_fwd_state_command_tbl()[asic_index].pop() @@ -3622,6 +3632,32 @@ def task_worker(self): handle_hw_mux_cable_table_grpc_notification( fvp_n, self.table_helper.get_hw_mux_cable_tbl_peer(), asic_index, self.table_helper.get_mux_metrics_tbl(), True, port_n) + def run(self): + if self.task_stopping_event.is_set(): + return + + try: + self.task_worker() + except Exception as e: + helper_logger.log_error("Exception occured at child thread YCableTableUpdateTask due to {} {}".format(repr(e), traceback.format_exc())) + self.exc = e + + + def join(self): + threading.Thread.join(self) + + if self.exc: + raise self.exc + +class YCableCliUpdateTask(threading.Thread): + def __init__(self): + threading.Thread.__init__(self) + + self.exc = None + self.task_download_firmware_thread = {} + self.task_stopping_event = threading.Event() + self.cli_table_helper = y_cable_table_helper.YcableCliUpdateTableHelper() + def task_cli_worker(self): @@ -3825,19 +3861,25 @@ def task_cli_worker(self): break - def task_run(self): - self.task_thread = threading.Thread(target=self.task_worker) - self.task_cli_thread = threading.Thread(target=self.task_cli_worker) - self.task_thread.start() - self.task_cli_thread.start() - - def task_stop(self): - - self.task_stopping_event.set() - helper_logger.log_info("stopping the cli and probing task threads xcvrd") - self.task_thread.join() - self.task_cli_thread.join() + def run(self): + if self.task_stopping_event.is_set(): + return + try: + self.task_cli_worker() + except Exception as e: + helper_logger.log_error("Exception occured at child thread YcableCliUpdateTask due to {} {}".format(repr(e), traceback.format_exc())) + self.exc = e + + def join(self): + + threading.Thread.join(self) + for key, value in self.task_download_firmware_thread.items(): self.task_download_firmware_thread[key].join() helper_logger.log_info("stopped all thread") + if self.exc is not None: + + raise self.exc + +