Skip to content

Commit

Permalink
udev: improve comparison between candidate and existing controllers
Browse files Browse the repository at this point in the history
This is so that nvme-stas can better identify existing controller
connections that can be reused for new candidate controllers.

Signed-off-by: Martin Belanger <martin.belanger@dell.com>
  • Loading branch information
Martin Belanger committed May 19, 2023
1 parent 441cced commit f37795d
Show file tree
Hide file tree
Showing 4 changed files with 314 additions and 19 deletions.
2 changes: 1 addition & 1 deletion staslib/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -574,7 +574,7 @@ def hostkey(self):
try:
value = self.__get_value('Host', 'key', defs.NVME_HOSTKEY)
except FileNotFoundError as ex:
logging.info('Host key undefined: %s', ex)
logging.debug('Host key undefined: %s', ex)
value = None

return value
Expand Down
164 changes: 162 additions & 2 deletions staslib/iputil.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
RTATTR_SZ = 4
RTA_ALIGN = lambda length: ((length + 3) & ~3) # pylint: disable=unnecessary-lambda-assignment
IFLA_ADDRESS = 1
IFLA_IFNAME = 3


def _nlmsghdr(nlmsg_type, nlmsg_flags, nlmsg_seq, nlmsg_pid, msg_len: int):
Expand Down Expand Up @@ -193,17 +194,176 @@ def _iface_of(src_addr): # pylint: disable=too-many-locals


# ******************************************************************************
def get_ipaddress_obj(ipaddr):
def get_ipaddress_obj(ipaddr, ipv4_mapped_convert=False):
'''@brief Return a IPv4Address or IPv6Address depending on whether @ipaddr
is a valid IPv4 or IPv6 address. Return None otherwise.'''
is a valid IPv4 or IPv6 address. Return None otherwise.
If ipv4_mapped_resolve is set to True, IPv6 addresses that are IPv4-Mapped,
will be converted to their IPv4 equivalent.
'''
try:
ip = ipaddress.ip_address(ipaddr)
except ValueError:
return None

if ipv4_mapped_convert:
ipv4_mapped = getattr(ip, 'ipv4_mapped', None)
if ipv4_mapped is not None:
ip = ipv4_mapped

return ip


# ******************************************************************************
def get_primary_src_addrs(iface: str): # pylint: disable=too-many-locals, too-many-branches
'''@brief Return the two primary IP addresses associated with interface @iface.
@param iface: The interface name to match
@return: tuple(primary-ipv4-addr-or-None, primary-ipv6-addr-or-None)
'''
iface_indx = None
interfaces = {}
with socket.socket(socket.AF_NETLINK, socket.SOCK_RAW) as sock:
sock.sendall(GETADDRCMD)
nlmsg = sock.recv(8192)
nlmsg_idx = 0
while True: # pylint: disable=too-many-nested-blocks
if nlmsg_idx >= len(nlmsg):
nlmsg += sock.recv(8192)

nlmsghdr = nlmsg[nlmsg_idx : nlmsg_idx + NLMSG_HDRLEN]
nlmsg_len, nlmsg_type, _, _, _ = struct.unpack('<LHHLL', nlmsghdr)

if nlmsg_type == NLMSG_DONE:
break

if nlmsg_type == RTM_NEWADDR:
msg_indx = nlmsg_idx + NLMSG_HDRLEN
msg = nlmsg[msg_indx : msg_indx + IFADDRMSG_SZ] # ifaddrmsg
ifa_family, _, _, _, ifa_index = struct.unpack('<BBBBL', msg)

interfaces.setdefault(ifa_index, {})

rtattr_indx = msg_indx + IFADDRMSG_SZ
while rtattr_indx < (nlmsg_idx + nlmsg_len):
rtattr = nlmsg[rtattr_indx : rtattr_indx + RTATTR_SZ]
rta_len, rta_type = struct.unpack('<HH', rtattr)

if rta_type == IFLA_IFNAME:
data = nlmsg[rtattr_indx + RTATTR_SZ : rtattr_indx + rta_len]
ifname = data.rstrip(b'\0').decode()
interfaces[ifa_index]['name'] = ifname
if ifname == iface:
iface_indx = ifa_index
ipv4_lst = interfaces[ifa_index].get(socket.AF_INET, [])
ipv6_lst = interfaces[ifa_index].get(socket.AF_INET6, [])
if len(ipv4_lst) and len(ipv6_lst):
return (ipv4_lst[0], ipv6_lst[0])

elif rta_type == IFLA_ADDRESS:
data = nlmsg[rtattr_indx + RTATTR_SZ : rtattr_indx + rta_len]
ip = get_ipaddress_obj(data)
if ip:
interfaces[ifa_index].setdefault(ifa_family, []).append(ip)
ifname = interfaces[ifa_index].get('name')
if ifname == iface:
ipv4_lst = interfaces[ifa_index].get(socket.AF_INET, [])
ipv6_lst = interfaces[ifa_index].get(socket.AF_INET6, [])
if len(ipv4_lst) and len(ipv6_lst):
return (ipv4_lst[0], ipv6_lst[0])

rta_len = RTA_ALIGN(rta_len) # Round up to multiple of 4
rtattr_indx += rta_len # Move to next rtattr

nlmsg_idx += nlmsg_len # Move to next Netlink message

if iface_indx is not None:
ipv4_lst = interfaces[iface_indx].get(socket.AF_INET, [None])
ipv6_lst = interfaces[iface_indx].get(socket.AF_INET6, [None])
return (ipv4_lst[0], ipv6_lst[0])

return None, None


# ******************************************************************************
def net_if_addrs(): # pylint: disable=too-many-locals
'''@brief Return a dictionary listing every IP addresses for each interface.
The first IP address of a list is the primary address used as the default
source address.
@example: {
'wlp0s20f3': {
4: ['10.0.0.28'],
6: [
'fd5e:9a9e:c5bd:0:5509:890c:1848:3843',
'fd5e:9a9e:c5bd:0:1fd5:e527:8df7:7912',
'2605:59c8:6128:fb00:c083:1b8:c467:81d2',
'2605:59c8:6128:fb00:e99d:1a02:38e0:ad52',
'fe80::d71b:e807:d5ee:7614'
],
},
'lo': {
4: ['127.0.0.1'],
6: ['::1'],
},
'docker0': {
4: ['172.17.0.1'],
6: []
},
}
'''
interfaces = {}
with socket.socket(socket.AF_NETLINK, socket.SOCK_RAW) as sock:
sock.sendall(GETADDRCMD)
nlmsg = sock.recv(8192)
nlmsg_idx = 0
while True: # pylint: disable=too-many-nested-blocks
if nlmsg_idx >= len(nlmsg):
nlmsg += sock.recv(8192)

nlmsghdr = nlmsg[nlmsg_idx : nlmsg_idx + NLMSG_HDRLEN]
nlmsg_len, nlmsg_type, _, _, _ = struct.unpack('<LHHLL', nlmsghdr)

if nlmsg_type == NLMSG_DONE:
break

if nlmsg_type == RTM_NEWADDR:
msg_indx = nlmsg_idx + NLMSG_HDRLEN
msg = nlmsg[msg_indx : msg_indx + IFADDRMSG_SZ] # ifaddrmsg
ifa_family, _, _, _, ifa_index = struct.unpack('<BBBBL', msg)

if ifa_family in (socket.AF_INET, socket.AF_INET6):
interfaces.setdefault(ifa_index, {4: [], 6: []})

rtattr_indx = msg_indx + IFADDRMSG_SZ
while rtattr_indx < (nlmsg_idx + nlmsg_len):
rtattr = nlmsg[rtattr_indx : rtattr_indx + RTATTR_SZ]
rta_len, rta_type = struct.unpack('<HH', rtattr)

if rta_type == IFLA_IFNAME:
data = nlmsg[rtattr_indx + RTATTR_SZ : rtattr_indx + rta_len]
ifname = data.rstrip(b'\0').decode()
interfaces[ifa_index]['name'] = ifname

elif rta_type == IFLA_ADDRESS:
data = nlmsg[rtattr_indx + RTATTR_SZ : rtattr_indx + rta_len]
ip = get_ipaddress_obj(data)
if ip:
family = 4 if ifa_family == socket.AF_INET else 6
interfaces[ifa_index][family].append(ip)

rta_len = RTA_ALIGN(rta_len) # Round up to multiple of 4
rtattr_indx += rta_len # Move to next rtattr

nlmsg_idx += nlmsg_len # Move to next Netlink message

if_addrs = {}
for value in interfaces.values():
name = value.pop('name', None)
if name is not None:
if_addrs[name] = value

return if_addrs


# ******************************************************************************
def get_interface(src_addr):
'''Get interface for given source address
Expand Down
159 changes: 147 additions & 12 deletions staslib/udev.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,136 @@ def is_ioc_device(device):

return False

@staticmethod
def _cid_matches_tid_legacy(cid, tid): # pylint: disable=too-many-return-statements
'''On kernels older than 6.1, the src_addr parameter is not available
from the sysfs. Therefore, we need to infer a match based on other
parameters. And there are a few cases where we're simply not sure
whether an existing connection (cid) matches the candidate
connection (tid).
'''
host_iface = cid['host-iface']
host_traddr = iputil.get_ipaddress_obj(cid['host-traddr'], ipv4_mapped_convert=True)

if not host_iface: # cid.host_iface is undefined
if not host_traddr: # cid.host_traddr is undefined
# When the existing cid.src_addr, cid.host_traddr, and cid.host_iface
# are all undefined (which can only happen on kernels prior to 6.1),
# we can't know for sure on which interface an existing connection
# was made. In this case, we can only declare a match if both
# tid.host_iface and tid.host_traddr are undefined as well.
logging.debug(
'Udev._cid_matches_tid_legacy() - cid=%s, tid=%s - Not enough info. Assume "match" but this could be wrong.'
)
return True

# cid.host_traddr is defined If tid.host_traddr is defined,
# then it must match the existing cid.host_traddr.
if tid.host_traddr and iputil.get_ipaddress_obj(tid.host_traddr) != host_traddr:
return False

# If tid.host_iface is defined, then the interface where
# the connection is located must match. If tid.host_iface
# is not defined, then we don't really care on which
# interface the connection was made and we can skip this test.
if tid.host_iface:
# With the existing cid.host_traddr, we can find the
# interface of the exisiting connection.
connection_iface = iputil.get_interface(str(host_traddr))
if tid.host_iface != connection_iface:
return False

return True

# cid.host_iface is defined
if not host_traddr: # cid.host_traddr is undefined
if tid.host_iface and tid.host_iface != host_iface:
return False

if not tid.host_traddr:
return True

# It's impossible to tell the existing connection source
# address. So, we can't tell if it matches tid.host_traddr.
# However, if the existing host_iface has only one source
# address assigned to it, we can assume that the source
# address used for the existing connection is that address.
if_addrs = iputil.net_if_addrs().get(host_iface, {4: [], 6: []})
tid_traddr = iputil.get_ipaddress_obj(tid.traddr)
source_addrs = if_addrs[tid_traddr.version]
if len(source_addrs) == 1 and source_addrs[0] == tid.host_traddr:
return True

return False

# cid.host_traddr is defined
if tid.host_iface and tid.host_iface != host_iface:
return False

if not tid.host_traddr:
# If candidate's tid.host_traddr is undefined, then
# the primary (default) source address will be used
# for the candidate controller connection. We need to
# make sure that the primary source address matches with
# the existing connection's source address.

# With the interface we can find the primary source
# address. If existing cid.host_traddr is one of the
# two primary addresses (i.e. primary IPv4 and primary
# IPv6), then we have a match.
return host_traddr in iputil.get_primary_src_addrs(host_iface)

return iputil.get_ipaddress_obj(tid.host_traddr) == host_traddr

@staticmethod
def _cid_matches_tid(cid, tid):
'''Check if existing controller's cid matches candidate controller's tid.
@param cid: The Connection ID of an existing controller (from the sysfs).
@param tid: The Transport ID of a candidate controller.
We're trying to find if an existing connection (specified by cid) can
be re-used for the candidate controller (specified by tid).
We do not have a match if the candidate's tid.transport, tid.traddr,
tid.trsvcid, and tid.subsysnqn are not identical to those of the cid.
These 4 parameters are mandatory for a match.
With regards to the candidate's tid.host_traddr and tid.host_iface, if
those are defined but do not match the existing cid.host_traddr and
cid.host_iface, we may still be able to find a match by taking the
existing cid.src_addr into consideration since that parameter identifies
the actual source address of the connection and therefore can be used
to infer the interface of the connection. However, the cid.src_addr can
only be read from the sysfs starting with kernel 6.1.
'''
if (
cid['transport'] != tid.transport
or cid['traddr'] != tid.traddr
or cid['trsvcid'] != tid.trsvcid
or cid['subsysnqn'] != tid.subsysnqn
):
return False

src_addr = iputil.get_ipaddress_obj(cid['src-addr'], ipv4_mapped_convert=True)
if not src_addr:
# For legacy kernels (i.e. older than 6.1), the existing cid.src_addr
# is always undefined. We need to use advanced logic to determine
# whether cid and tid match.
return Udev._cid_matches_tid_legacy(cid, tid)

# The existing controller's cid.src_addr is always defined for kernel
# 6.1 and later. We can use the existing controller's cid.src_addr to
# find the interface on which the connection was made and therefore
# match it to the candidate's tid.host_iface. And the cid.src_addr
# can also be used to match the candidate's tid.host_traddr.
if tid.host_traddr and src_addr != iputil.get_ipaddress_obj(tid.host_traddr):
return False

if tid.host_iface and tid.host_iface != iputil.get_interface(str(src_addr)):
return False

return True

def find_nvme_dc_device(self, tid):
'''@brief Find the nvme device associated with the specified
Discovery Controller.
Expand All @@ -164,7 +294,8 @@ def find_nvme_dc_device(self, tid):
if not self.is_dc_device(device):
continue

if self.get_tid(device) != tid:
cid = self.get_cid(device)
if not self._cid_matches_tid(cid, tid):
continue

return device
Expand All @@ -182,7 +313,8 @@ def find_nvme_ioc_device(self, tid):
if not self.is_ioc_device(device):
continue

if self.get_tid(device) != tid:
cid = self.get_cid(device)
if not self._cid_matches_tid(cid, tid):
continue

return device
Expand Down Expand Up @@ -300,28 +432,31 @@ def get_key_from_attr(device, attr, key, delim=','):
return attr_str[start:end]

@staticmethod
def _get_host_iface(device):
host_iface = Udev._get_property(device, 'NVME_HOST_IFACE')
if not host_iface:
def get_tid(device):
'''@brief return the Transport ID associated with a udev device'''
cid = Udev.get_cid(device)
src_addr = cid['src-addr']
if not cid['host-iface'] and src_addr:
# We'll try to find the interface from the source address on
# the connection. Only available if kernel exposes the source
# address (src_addr) in the "address" attribute.
src_addr = Udev.get_key_from_attr(device, 'address', 'src_addr=')
host_iface = iputil.get_interface(src_addr)
return host_iface
cid['host-iface'] = iputil.get_interface(src_addr)

return trid.TID(cid)

@staticmethod
def get_tid(device):
'''@brief return the Transport ID associated with a udev device'''
def get_cid(device):
'''@brief return the Connection ID associated with a udev device'''
cid = {
'transport': Udev._get_property(device, 'NVME_TRTYPE'),
'traddr': Udev._get_property(device, 'NVME_TRADDR'),
'trsvcid': Udev._get_property(device, 'NVME_TRSVCID'),
'host-traddr': Udev._get_property(device, 'NVME_HOST_TRADDR'),
'host-iface': Udev._get_host_iface(device),
'host-iface': Udev._get_property(device, 'NVME_HOST_IFACE'),
'subsysnqn': Udev._get_attribute(device, 'subsysnqn'),
'src-addr': Udev.get_key_from_attr(device, 'address', 'src_addr='),
}
return trid.TID(cid)
return cid


UDEV = Udev() # Singleton
Expand Down
Loading

0 comments on commit f37795d

Please sign in to comment.