Skip to content

Commit

Permalink
[pcied] Add PCIe AER stats collection (sonic-net#100)
Browse files Browse the repository at this point in the history
In pcied, added support to collect AER stats belonging to different severities for AER supported PCIe devices and update it in STATE_DB.

The key used to represent a PCIE device for storing its AER stats in STATE_DB is of the format PCIE_DEVICE|<Bus>:<Dev>.<Fn>.
For every device, AER stats will be stored as key, value pairs where key is of the format <severity>|<AER Error type> and the device ID will be stored with key id.

HLD: sonic-net/SONiC#678, sonic-net/SONiC#720
Depends on: sonic-net/sonic-platform-common#144
  • Loading branch information
ArunSaravananBalachandran authored Jan 26, 2021
1 parent e72f6cd commit 1fcaa57
Showing 1 changed file with 47 additions and 4 deletions.
51 changes: 47 additions & 4 deletions sonic-pcied/scripts/pcied
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,12 @@
try:
import os
import signal
import subprocess
import sys
import threading

import swsssdk
from sonic_py_common.daemon_base import DaemonBase
from sonic_py_common import device_info
from sonic_py_common import daemon_base, device_info
from swsscommon import swsscommon
except ImportError as e:
raise ImportError(str(e) + " - required module not found")

Expand All @@ -25,6 +24,7 @@ SYSLOG_IDENTIFIER = "pcied"

PCIE_RESULT_REGEX = "PCIe Device Checking All Test"
PCIE_TABLE_NAME = "PCIE_STATUS"
PCIE_DEVICE_TABLE_NAME = "PCIE_DEVICE"

PCIE_CONF_FILE = 'pcie.yaml'

Expand All @@ -36,7 +36,7 @@ REDIS_HOSTIP = "127.0.0.1"
#


class DaemonPcied(DaemonBase):
class DaemonPcied(daemon_base.DaemonBase):
def __init__(self, log_identifier):
super(DaemonPcied, self).__init__(log_identifier)

Expand All @@ -52,6 +52,31 @@ class DaemonPcied(DaemonBase):

self.state_db = swsssdk.SonicV2Connector(host=REDIS_HOSTIP)
self.state_db.connect("STATE_DB")
state_db = daemon_base.db_connect("STATE_DB")
self.device_table = swsscommon.Table(state_db, PCIE_DEVICE_TABLE_NAME)

# Load AER-fields into STATEDB
def update_aer_to_statedb(self, device_name, aer_stats):

aer_fields = {}

for field, value in aer_stats['correctable'].items():
correctable_field = "correctable|" + field
aer_fields[correctable_field] = value

for field, value in aer_stats['fatal'].items():
fatal_field = "fatal|" + field
aer_fields[fatal_field] = value

for field, value in aer_stats['non_fatal'].items():
non_fatal_field = "non_fatal|" + field
aer_fields[non_fatal_field] = value

if aer_fields:
formatted_fields = swsscommon.FieldValuePairs(list(aer_fields.items()))
self.device_table.set(device_name, formatted_fields)
else:
self.log_debug("PCIe device {} has no AER attriutes".format(device_name))

# Check the PCIe devices
def check_pcie_devices(self):
Expand Down Expand Up @@ -84,6 +109,24 @@ class DaemonPcied(DaemonBase):
self.update_state_db("PCIE_DEVICES", "status", "PASSED")
self.log_info("PCIe device status check : PASSED")

# update AER-attributes to DB
for item in resultInfo:
if item["result"] == "Failed":
continue

Bus = int(item["bus"], 16)
Dev = int(item["dev"], 16)
Fn = int(item["fn"], 16)

device_name = "%02x:%02x.%d" % (Bus, Dev, Fn)
dev_id_path = '/sys/bus/pci/devices/0000:%s/device' % device_name
with open(dev_id_path, 'r') as fd:
Id = fd.read().strip()

self.device_table.set(device_name, [('id', Id)])
aer_stats = platform_pcieutil.get_pcie_aer_stats(bus=Bus, device=Dev, func=Fn)
self.update_aer_to_statedb(device_name, aer_stats)

def read_state_db(self, key1, key2):
return self.state_db.get('STATE_DB', key1, key2)

Expand Down

0 comments on commit 1fcaa57

Please sign in to comment.