Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

udev: improve comparison between candidate and existing controllers #365

Merged
merged 1 commit into from
May 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion staslib/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -574,7 +574,7 @@ def hostkey(self):
try:
value = self.__get_value('Host', 'key', defs.NVME_HOSTKEY)
except FileNotFoundError as ex:
logging.info('Host key undefined: %s', ex)
logging.debug('Host key undefined: %s', ex)
value = None

return value
Expand Down
164 changes: 162 additions & 2 deletions staslib/iputil.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
RTATTR_SZ = 4
RTA_ALIGN = lambda length: ((length + 3) & ~3) # pylint: disable=unnecessary-lambda-assignment
IFLA_ADDRESS = 1
IFLA_IFNAME = 3


def _nlmsghdr(nlmsg_type, nlmsg_flags, nlmsg_seq, nlmsg_pid, msg_len: int):
Expand Down Expand Up @@ -193,17 +194,176 @@ def _iface_of(src_addr): # pylint: disable=too-many-locals


# ******************************************************************************
def get_ipaddress_obj(ipaddr):
def get_ipaddress_obj(ipaddr, ipv4_mapped_convert=False):
'''@brief Return a IPv4Address or IPv6Address depending on whether @ipaddr
is a valid IPv4 or IPv6 address. Return None otherwise.'''
is a valid IPv4 or IPv6 address. Return None otherwise.

If ipv4_mapped_resolve is set to True, IPv6 addresses that are IPv4-Mapped,
will be converted to their IPv4 equivalent.
'''
try:
ip = ipaddress.ip_address(ipaddr)
except ValueError:
return None

if ipv4_mapped_convert:
ipv4_mapped = getattr(ip, 'ipv4_mapped', None)
if ipv4_mapped is not None:
ip = ipv4_mapped

return ip


# ******************************************************************************
def get_primary_src_addrs(iface: str): # pylint: disable=too-many-locals, too-many-branches
'''@brief Return the two primary IP addresses associated with interface @iface.
@param iface: The interface name to match
@return: tuple(primary-ipv4-addr-or-None, primary-ipv6-addr-or-None)
'''
iface_indx = None
interfaces = {}
with socket.socket(socket.AF_NETLINK, socket.SOCK_RAW) as sock:
sock.sendall(GETADDRCMD)
nlmsg = sock.recv(8192)
nlmsg_idx = 0
while True: # pylint: disable=too-many-nested-blocks
if nlmsg_idx >= len(nlmsg):
nlmsg += sock.recv(8192)

nlmsghdr = nlmsg[nlmsg_idx : nlmsg_idx + NLMSG_HDRLEN]
nlmsg_len, nlmsg_type, _, _, _ = struct.unpack('<LHHLL', nlmsghdr)

if nlmsg_type == NLMSG_DONE:
break

if nlmsg_type == RTM_NEWADDR:
msg_indx = nlmsg_idx + NLMSG_HDRLEN
msg = nlmsg[msg_indx : msg_indx + IFADDRMSG_SZ] # ifaddrmsg
ifa_family, _, _, _, ifa_index = struct.unpack('<BBBBL', msg)

interfaces.setdefault(ifa_index, {})

rtattr_indx = msg_indx + IFADDRMSG_SZ
while rtattr_indx < (nlmsg_idx + nlmsg_len):
rtattr = nlmsg[rtattr_indx : rtattr_indx + RTATTR_SZ]
rta_len, rta_type = struct.unpack('<HH', rtattr)

if rta_type == IFLA_IFNAME:
data = nlmsg[rtattr_indx + RTATTR_SZ : rtattr_indx + rta_len]
ifname = data.rstrip(b'\0').decode()
interfaces[ifa_index]['name'] = ifname
if ifname == iface:
iface_indx = ifa_index
ipv4_lst = interfaces[ifa_index].get(socket.AF_INET, [])
ipv6_lst = interfaces[ifa_index].get(socket.AF_INET6, [])
if len(ipv4_lst) and len(ipv6_lst):
return (ipv4_lst[0], ipv6_lst[0])

elif rta_type == IFLA_ADDRESS:
data = nlmsg[rtattr_indx + RTATTR_SZ : rtattr_indx + rta_len]
ip = get_ipaddress_obj(data)
if ip:
interfaces[ifa_index].setdefault(ifa_family, []).append(ip)
ifname = interfaces[ifa_index].get('name')
if ifname == iface:
ipv4_lst = interfaces[ifa_index].get(socket.AF_INET, [])
ipv6_lst = interfaces[ifa_index].get(socket.AF_INET6, [])
if len(ipv4_lst) and len(ipv6_lst):
return (ipv4_lst[0], ipv6_lst[0])

rta_len = RTA_ALIGN(rta_len) # Round up to multiple of 4
rtattr_indx += rta_len # Move to next rtattr

nlmsg_idx += nlmsg_len # Move to next Netlink message

if iface_indx is not None:
ipv4_lst = interfaces[iface_indx].get(socket.AF_INET, [None])
ipv6_lst = interfaces[iface_indx].get(socket.AF_INET6, [None])
return (ipv4_lst[0], ipv6_lst[0])

return None, None


# ******************************************************************************
def net_if_addrs(): # pylint: disable=too-many-locals
'''@brief Return a dictionary listing every IP addresses for each interface.
The first IP address of a list is the primary address used as the default
source address.
@example: {
'wlp0s20f3': {
4: ['10.0.0.28'],
6: [
'fd5e:9a9e:c5bd:0:5509:890c:1848:3843',
'fd5e:9a9e:c5bd:0:1fd5:e527:8df7:7912',
'2605:59c8:6128:fb00:c083:1b8:c467:81d2',
'2605:59c8:6128:fb00:e99d:1a02:38e0:ad52',
'fe80::d71b:e807:d5ee:7614'
],
},
'lo': {
4: ['127.0.0.1'],
6: ['::1'],
},
'docker0': {
4: ['172.17.0.1'],
6: []
},
}
'''
interfaces = {}
with socket.socket(socket.AF_NETLINK, socket.SOCK_RAW) as sock:
sock.sendall(GETADDRCMD)
nlmsg = sock.recv(8192)
nlmsg_idx = 0
while True: # pylint: disable=too-many-nested-blocks
if nlmsg_idx >= len(nlmsg):
nlmsg += sock.recv(8192)

nlmsghdr = nlmsg[nlmsg_idx : nlmsg_idx + NLMSG_HDRLEN]
nlmsg_len, nlmsg_type, _, _, _ = struct.unpack('<LHHLL', nlmsghdr)

if nlmsg_type == NLMSG_DONE:
break

if nlmsg_type == RTM_NEWADDR:
msg_indx = nlmsg_idx + NLMSG_HDRLEN
msg = nlmsg[msg_indx : msg_indx + IFADDRMSG_SZ] # ifaddrmsg
ifa_family, _, _, _, ifa_index = struct.unpack('<BBBBL', msg)

if ifa_family in (socket.AF_INET, socket.AF_INET6):
interfaces.setdefault(ifa_index, {4: [], 6: []})

rtattr_indx = msg_indx + IFADDRMSG_SZ
while rtattr_indx < (nlmsg_idx + nlmsg_len):
rtattr = nlmsg[rtattr_indx : rtattr_indx + RTATTR_SZ]
rta_len, rta_type = struct.unpack('<HH', rtattr)

if rta_type == IFLA_IFNAME:
data = nlmsg[rtattr_indx + RTATTR_SZ : rtattr_indx + rta_len]
ifname = data.rstrip(b'\0').decode()
interfaces[ifa_index]['name'] = ifname

elif rta_type == IFLA_ADDRESS:
data = nlmsg[rtattr_indx + RTATTR_SZ : rtattr_indx + rta_len]
ip = get_ipaddress_obj(data)
if ip:
family = 4 if ifa_family == socket.AF_INET else 6
interfaces[ifa_index][family].append(ip)

rta_len = RTA_ALIGN(rta_len) # Round up to multiple of 4
rtattr_indx += rta_len # Move to next rtattr

nlmsg_idx += nlmsg_len # Move to next Netlink message

if_addrs = {}
for value in interfaces.values():
name = value.pop('name', None)
if name is not None:
if_addrs[name] = value

return if_addrs


# ******************************************************************************
def get_interface(src_addr):
'''Get interface for given source address
Expand Down
159 changes: 147 additions & 12 deletions staslib/udev.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,136 @@ def is_ioc_device(device):

return False

@staticmethod
def _cid_matches_tid_legacy(cid, tid): # pylint: disable=too-many-return-statements
'''On kernels older than 6.1, the src_addr parameter is not available
from the sysfs. Therefore, we need to infer a match based on other
parameters. And there are a few cases where we're simply not sure
whether an existing connection (cid) matches the candidate
connection (tid).
'''
host_iface = cid['host-iface']
host_traddr = iputil.get_ipaddress_obj(cid['host-traddr'], ipv4_mapped_convert=True)

if not host_iface: # cid.host_iface is undefined
if not host_traddr: # cid.host_traddr is undefined
# When the existing cid.src_addr, cid.host_traddr, and cid.host_iface
# are all undefined (which can only happen on kernels prior to 6.1),
# we can't know for sure on which interface an existing connection
# was made. In this case, we can only declare a match if both
# tid.host_iface and tid.host_traddr are undefined as well.
logging.debug(
'Udev._cid_matches_tid_legacy() - cid=%s, tid=%s - Not enough info. Assume "match" but this could be wrong.'
)
return True

# cid.host_traddr is defined If tid.host_traddr is defined,
# then it must match the existing cid.host_traddr.
if tid.host_traddr and iputil.get_ipaddress_obj(tid.host_traddr) != host_traddr:
return False

# If tid.host_iface is defined, then the interface where
# the connection is located must match. If tid.host_iface
# is not defined, then we don't really care on which
# interface the connection was made and we can skip this test.
if tid.host_iface:
# With the existing cid.host_traddr, we can find the
# interface of the exisiting connection.
connection_iface = iputil.get_interface(str(host_traddr))
if tid.host_iface != connection_iface:
return False

return True

# cid.host_iface is defined
if not host_traddr: # cid.host_traddr is undefined
if tid.host_iface and tid.host_iface != host_iface:
return False

if not tid.host_traddr:
return True

# It's impossible to tell the existing connection source
# address. So, we can't tell if it matches tid.host_traddr.
# However, if the existing host_iface has only one source
# address assigned to it, we can assume that the source
# address used for the existing connection is that address.
if_addrs = iputil.net_if_addrs().get(host_iface, {4: [], 6: []})
tid_traddr = iputil.get_ipaddress_obj(tid.traddr)
source_addrs = if_addrs[tid_traddr.version]
if len(source_addrs) == 1 and source_addrs[0] == tid.host_traddr:
return True

return False

# cid.host_traddr is defined
if tid.host_iface and tid.host_iface != host_iface:
return False

if not tid.host_traddr:
# If candidate's tid.host_traddr is undefined, then
# the primary (default) source address will be used
# for the candidate controller connection. We need to
# make sure that the primary source address matches with
# the existing connection's source address.

# With the interface we can find the primary source
# address. If existing cid.host_traddr is one of the
# two primary addresses (i.e. primary IPv4 and primary
# IPv6), then we have a match.
return host_traddr in iputil.get_primary_src_addrs(host_iface)

return iputil.get_ipaddress_obj(tid.host_traddr) == host_traddr

@staticmethod
def _cid_matches_tid(cid, tid):
'''Check if existing controller's cid matches candidate controller's tid.
@param cid: The Connection ID of an existing controller (from the sysfs).
@param tid: The Transport ID of a candidate controller.

We're trying to find if an existing connection (specified by cid) can
be re-used for the candidate controller (specified by tid).

We do not have a match if the candidate's tid.transport, tid.traddr,
tid.trsvcid, and tid.subsysnqn are not identical to those of the cid.
These 4 parameters are mandatory for a match.

With regards to the candidate's tid.host_traddr and tid.host_iface, if
those are defined but do not match the existing cid.host_traddr and
cid.host_iface, we may still be able to find a match by taking the
existing cid.src_addr into consideration since that parameter identifies
the actual source address of the connection and therefore can be used
to infer the interface of the connection. However, the cid.src_addr can
only be read from the sysfs starting with kernel 6.1.
'''
if (
cid['transport'] != tid.transport
or cid['traddr'] != tid.traddr
or cid['trsvcid'] != tid.trsvcid
or cid['subsysnqn'] != tid.subsysnqn
):
return False

src_addr = iputil.get_ipaddress_obj(cid['src-addr'], ipv4_mapped_convert=True)
if not src_addr:
# For legacy kernels (i.e. older than 6.1), the existing cid.src_addr
# is always undefined. We need to use advanced logic to determine
# whether cid and tid match.
return Udev._cid_matches_tid_legacy(cid, tid)

# The existing controller's cid.src_addr is always defined for kernel
# 6.1 and later. We can use the existing controller's cid.src_addr to
# find the interface on which the connection was made and therefore
# match it to the candidate's tid.host_iface. And the cid.src_addr
# can also be used to match the candidate's tid.host_traddr.
if tid.host_traddr and src_addr != iputil.get_ipaddress_obj(tid.host_traddr):
return False

if tid.host_iface and tid.host_iface != iputil.get_interface(str(src_addr)):
return False

return True

def find_nvme_dc_device(self, tid):
'''@brief Find the nvme device associated with the specified
Discovery Controller.
Expand All @@ -164,7 +294,8 @@ def find_nvme_dc_device(self, tid):
if not self.is_dc_device(device):
continue

if self.get_tid(device) != tid:
cid = self.get_cid(device)
if not self._cid_matches_tid(cid, tid):
continue

return device
Expand All @@ -182,7 +313,8 @@ def find_nvme_ioc_device(self, tid):
if not self.is_ioc_device(device):
continue

if self.get_tid(device) != tid:
cid = self.get_cid(device)
if not self._cid_matches_tid(cid, tid):
continue

return device
Expand Down Expand Up @@ -300,28 +432,31 @@ def get_key_from_attr(device, attr, key, delim=','):
return attr_str[start:end]

@staticmethod
def _get_host_iface(device):
host_iface = Udev._get_property(device, 'NVME_HOST_IFACE')
if not host_iface:
def get_tid(device):
'''@brief return the Transport ID associated with a udev device'''
cid = Udev.get_cid(device)
src_addr = cid['src-addr']
if not cid['host-iface'] and src_addr:
# We'll try to find the interface from the source address on
# the connection. Only available if kernel exposes the source
# address (src_addr) in the "address" attribute.
src_addr = Udev.get_key_from_attr(device, 'address', 'src_addr=')
host_iface = iputil.get_interface(src_addr)
return host_iface
cid['host-iface'] = iputil.get_interface(src_addr)

return trid.TID(cid)

@staticmethod
def get_tid(device):
'''@brief return the Transport ID associated with a udev device'''
def get_cid(device):
'''@brief return the Connection ID associated with a udev device'''
cid = {
'transport': Udev._get_property(device, 'NVME_TRTYPE'),
'traddr': Udev._get_property(device, 'NVME_TRADDR'),
'trsvcid': Udev._get_property(device, 'NVME_TRSVCID'),
'host-traddr': Udev._get_property(device, 'NVME_HOST_TRADDR'),
'host-iface': Udev._get_host_iface(device),
'host-iface': Udev._get_property(device, 'NVME_HOST_IFACE'),
'subsysnqn': Udev._get_attribute(device, 'subsysnqn'),
'src-addr': Udev.get_key_from_attr(device, 'address', 'src_addr='),
}
return trid.TID(cid)
return cid


UDEV = Udev() # Singleton
Expand Down
Loading