From 9f5397d603a21f484587bf6748a7336fd5005b06 Mon Sep 17 00:00:00 2001 From: Fantix King Date: Sun, 11 Sep 2022 10:39:22 -0400 Subject: [PATCH 1/3] Consistent loop.getaddrinfo() API * ai_canonname is different across libc impls (#494) * AddressFamily and SocketKind can be enums * Also fixed failing test --- tests/test_dns.py | 4 ++++ tests/test_tcp.py | 2 +- uvloop/dns.pyx | 40 +++++++++++++++++++++++++++++++++++--- uvloop/includes/stdlib.pxi | 1 + uvloop/loop.pyx | 24 ++++++++++++++++++++--- 5 files changed, 64 insertions(+), 7 deletions(-) diff --git a/tests/test_dns.py b/tests/test_dns.py index d4e14244..7b6d5911 100644 --- a/tests/test_dns.py +++ b/tests/test_dns.py @@ -130,10 +130,14 @@ def test_getaddrinfo_18(self): def test_getaddrinfo_19(self): self._test_getaddrinfo('::1', 80) self._test_getaddrinfo('::1', 80, type=socket.SOCK_STREAM) + self._test_getaddrinfo('::1', 80, type=socket.SOCK_STREAM, + flags=socket.AI_CANONNAME) def test_getaddrinfo_20(self): self._test_getaddrinfo('127.0.0.1', 80) self._test_getaddrinfo('127.0.0.1', 80, type=socket.SOCK_STREAM) + self._test_getaddrinfo('127.0.0.1', 80, type=socket.SOCK_STREAM, + flags=socket.AI_CANONNAME) ###### diff --git a/tests/test_tcp.py b/tests/test_tcp.py index a72b42be..cc03a976 100644 --- a/tests/test_tcp.py +++ b/tests/test_tcp.py @@ -222,7 +222,7 @@ def test_create_server_4(self): with self.assertRaisesRegex(OSError, r"error while attempting.*\('127.*: " - r"address already in use"): + r"address( already)? in use"): self.loop.run_until_complete( self.loop.create_server(object, *addr)) diff --git a/uvloop/dns.pyx b/uvloop/dns.pyx index a252a242..ffb251d8 100644 --- a/uvloop/dns.pyx +++ b/uvloop/dns.pyx @@ -224,6 +224,17 @@ cdef __static_getaddrinfo(object host, object port, return (family, type, proto) +# This bitmask is used in __static_getaddrinfo_pyaddr() to manage +# if ai_canonname should be set if AI_CANONNAME flag is set. +# This bitmask is lazily set in loop.getaddrinfo() to make sure that +# __static_getaddrinfo_pyaddr() behaves consistently as libc getaddrinfo(). +# 1 << 0 : If 1 << 1 is set +# 1 << 1 : If ai_canonname should be set if AI_CANONNAME is set +# 1 << 2 : If 1 << 3 is set +# 1 << 3 : If ai_canonname should be set if AI_CANONNAME is not set +cdef int __static_getaddrinfo_canonname_mode = 0 + + cdef __static_getaddrinfo_pyaddr(object host, object port, int family, int type, int proto, int flags): @@ -245,7 +256,23 @@ cdef __static_getaddrinfo_pyaddr(object host, object port, except Exception: return - return af, type, proto, '', pyaddr + if __static_getaddrinfo_canonname_mode & ( + 1 << 1 if flags & socket_AI_CANONNAME else 1 << 3 + ): + if isinstance(host, str): + canon_name = host + else: + canon_name = host.decode('ascii') + else: + canon_name = '' + + return ( + _intenum_converter(af, socket_AddressFamily), + _intenum_converter(type, socket_SocketKind), + proto, + canon_name, + pyaddr, + ) @cython.freelist(DEFAULT_FREELIST_SIZE) @@ -276,8 +303,8 @@ cdef class AddrInfo: while ptr != NULL: if ptr.ai_addr.sa_family in (uv.AF_INET, uv.AF_INET6): result.append(( - ptr.ai_family, - ptr.ai_socktype, + _intenum_converter(ptr.ai_family, socket_AddressFamily), + _intenum_converter(ptr.ai_socktype, socket_SocketKind), ptr.ai_protocol, ('' if ptr.ai_canonname is NULL else (ptr.ai_canonname).decode()), @@ -370,6 +397,13 @@ cdef class NameInfoRequest(UVRequest): self.callback(convert_error(err)) +cdef _intenum_converter(value, enum_klass): + try: + return enum_klass(value) + except ValueError: + return value + + cdef void __on_addrinfo_resolved(uv.uv_getaddrinfo_t *resolver, int status, system.addrinfo *res) with gil: diff --git a/uvloop/includes/stdlib.pxi b/uvloop/includes/stdlib.pxi index a5ef7954..e7957fe2 100644 --- a/uvloop/includes/stdlib.pxi +++ b/uvloop/includes/stdlib.pxi @@ -72,6 +72,7 @@ cdef int has_SO_REUSEPORT = hasattr(socket, 'SO_REUSEPORT') cdef int SO_REUSEPORT = getattr(socket, 'SO_REUSEPORT', 0) cdef int SO_BROADCAST = getattr(socket, 'SO_BROADCAST') cdef int SOCK_NONBLOCK = getattr(socket, 'SOCK_NONBLOCK', -1) +cdef int socket_AI_CANONNAME = getattr(socket, 'AI_CANONNAME') cdef socket_gaierror = socket.gaierror cdef socket_error = socket.error diff --git a/uvloop/loop.pyx b/uvloop/loop.pyx index edb58d97..a07f926d 100644 --- a/uvloop/loop.pyx +++ b/uvloop/loop.pyx @@ -1523,13 +1523,31 @@ cdef class Loop: @cython.iterable_coroutine async def getaddrinfo(self, object host, object port, *, int family=0, int type=0, int proto=0, int flags=0): + global __static_getaddrinfo_canonname_mode addr = __static_getaddrinfo_pyaddr(host, port, family, type, proto, flags) if addr is not None: - fut = self._new_future() - fut.set_result([addr]) - return await fut + if __static_getaddrinfo_canonname_mode & ( + 1 << 0 if flags & socket_AI_CANONNAME else 1 << 2 + ): + return [addr] + + rv = await self._getaddrinfo( + host, port, family, type, proto, flags, 1) + + if flags & socket_AI_CANONNAME: + if rv[0][3]: + __static_getaddrinfo_canonname_mode |= 1 << 0 | 1 << 1 + else: + __static_getaddrinfo_canonname_mode |= 1 << 0 + else: + if rv[0][3]: + __static_getaddrinfo_canonname_mode |= 1 << 2 | 1 << 3 + else: + __static_getaddrinfo_canonname_mode |= 1 << 2 + + return rv return await self._getaddrinfo( host, port, family, type, proto, flags, 1) From 0b27ec4ee601948f7288e1f6b804f40db1c38d23 Mon Sep 17 00:00:00 2001 From: Fantix King Date: Sun, 11 Sep 2022 15:51:04 -0400 Subject: [PATCH 2/3] CRF: improve readibility --- uvloop/dns.pyx | 28 ++++++++++++++++++++-------- uvloop/loop.pyx | 27 ++++++++++++++++++--------- 2 files changed, 38 insertions(+), 17 deletions(-) diff --git a/uvloop/dns.pyx b/uvloop/dns.pyx index ffb251d8..6c1d9744 100644 --- a/uvloop/dns.pyx +++ b/uvloop/dns.pyx @@ -224,16 +224,27 @@ cdef __static_getaddrinfo(object host, object port, return (family, type, proto) -# This bitmask is used in __static_getaddrinfo_pyaddr() to manage -# if ai_canonname should be set if AI_CANONNAME flag is set. -# This bitmask is lazily set in loop.getaddrinfo() to make sure that +# This flag is used in __static_getaddrinfo_pyaddr() to manage +# if ai_canonname should be returned when AI_CANONNAME flag is set, +# because its behavior varies in different libc implementations (see #494). +# This flag is lazily set in loop.getaddrinfo() to make sure that # __static_getaddrinfo_pyaddr() behaves consistently as libc getaddrinfo(). -# 1 << 0 : If 1 << 1 is set -# 1 << 1 : If ai_canonname should be set if AI_CANONNAME is set -# 1 << 2 : If 1 << 3 is set -# 1 << 3 : If ai_canonname should be set if AI_CANONNAME is not set cdef int __static_getaddrinfo_canonname_mode = 0 +# Bitmasks for __static_getaddrinfo_canonname_mode: + +# If STATIC_GETADDRINFO_CANONNAME_ON_RETURN is set +cdef int STATIC_GETADDRINFO_CANONNAME_ON_BEHAVIOR_SET = 1 << 0 + +# If ai_canonname should be returned when AI_CANONNAME is set +cdef int STATIC_GETADDRINFO_CANONNAME_ON_RETURN = 1 << 1 + +# If STATIC_GETADDRINFO_CANONNAME_OFF_RETURN is set +cdef int STATIC_GETADDRINFO_CANONNAME_OFF_BEHAVIOR_SET = 1 << 2 + +# If ai_canonname should be returned when AI_CANONNAME is not set +cdef int STATIC_GETADDRINFO_CANONNAME_OFF_RETURN = 1 << 3 + cdef __static_getaddrinfo_pyaddr(object host, object port, int family, int type, @@ -257,7 +268,8 @@ cdef __static_getaddrinfo_pyaddr(object host, object port, return if __static_getaddrinfo_canonname_mode & ( - 1 << 1 if flags & socket_AI_CANONNAME else 1 << 3 + STATIC_GETADDRINFO_CANONNAME_ON_RETURN if flags & socket_AI_CANONNAME + else STATIC_GETADDRINFO_CANONNAME_OFF_RETURN ): if isinstance(host, str): canon_name = host diff --git a/uvloop/loop.pyx b/uvloop/loop.pyx index a07f926d..981ed9ce 100644 --- a/uvloop/loop.pyx +++ b/uvloop/loop.pyx @@ -1529,23 +1529,32 @@ cdef class Loop: type, proto, flags) if addr is not None: if __static_getaddrinfo_canonname_mode & ( - 1 << 0 if flags & socket_AI_CANONNAME else 1 << 2 + STATIC_GETADDRINFO_CANONNAME_ON_BEHAVIOR_SET + if flags & socket_AI_CANONNAME + else STATIC_GETADDRINFO_CANONNAME_OFF_BEHAVIOR_SET ): return [addr] rv = await self._getaddrinfo( host, port, family, type, proto, flags, 1) + _af, _type, _proto, canon_name, _addr = rv[0] if flags & socket_AI_CANONNAME: - if rv[0][3]: - __static_getaddrinfo_canonname_mode |= 1 << 0 | 1 << 1 - else: - __static_getaddrinfo_canonname_mode |= 1 << 0 + __static_getaddrinfo_canonname_mode |= ( + STATIC_GETADDRINFO_CANONNAME_ON_BEHAVIOR_SET + ) + if canon_name: + __static_getaddrinfo_canonname_mode |= ( + STATIC_GETADDRINFO_CANONNAME_ON_RETURN + ) else: - if rv[0][3]: - __static_getaddrinfo_canonname_mode |= 1 << 2 | 1 << 3 - else: - __static_getaddrinfo_canonname_mode |= 1 << 2 + __static_getaddrinfo_canonname_mode |= ( + STATIC_GETADDRINFO_CANONNAME_OFF_BEHAVIOR_SET + ) + if canon_name: + __static_getaddrinfo_canonname_mode |= ( + STATIC_GETADDRINFO_CANONNAME_OFF_RETURN + ) return rv From 93f32d2ae5893a790e31c3abf09e619e60c275a4 Mon Sep 17 00:00:00 2001 From: Fantix King Date: Mon, 12 Sep 2022 15:26:43 -0400 Subject: [PATCH 3/3] CRF: ai_canonname always follows the flag AI_CANONNAME --- tests/test_dns.py | 66 ++++++++++++++++++++++++++++++++++++++++------- uvloop/dns.pyx | 27 +------------------ uvloop/loop.pyx | 31 +--------------------- 3 files changed, 58 insertions(+), 66 deletions(-) diff --git a/tests/test_dns.py b/tests/test_dns.py index 7b6d5911..12408978 100644 --- a/tests/test_dns.py +++ b/tests/test_dns.py @@ -5,12 +5,32 @@ from uvloop import _testbase as tb +def patched_getaddrinfo(*args, **kwargs): + # corrected socket.getaddrinfo() behavior: ai_canonname always follows the + # flag AI_CANONNAME, even if `host` is an IP + rv = [] + result = socket.getaddrinfo(*args, **kwargs) + for af, sk, proto, canon_name, addr in result: + if kwargs.get('flags', 0) & socket.AI_CANONNAME: + if not canon_name: + canon_name = args[0] + if not isinstance(canon_name, str): + canon_name = canon_name.decode('ascii') + elif canon_name: + canon_name = '' + rv.append((af, sk, proto, canon_name, addr)) + return rv + + class BaseTestDNS: - def _test_getaddrinfo(self, *args, **kwargs): + def _test_getaddrinfo(self, *args, _patch=False, **kwargs): err = None try: - a1 = socket.getaddrinfo(*args, **kwargs) + if _patch: + a1 = patched_getaddrinfo(*args, **kwargs) + else: + a1 = socket.getaddrinfo(*args, **kwargs) except socket.gaierror as ex: err = ex @@ -100,20 +120,36 @@ def test_getaddrinfo_11(self): self._test_getaddrinfo(b'example.com', '80', type=socket.SOCK_STREAM) def test_getaddrinfo_12(self): + # musl always returns ai_canonname but we don't + patch = self.implementation != 'asyncio' + self._test_getaddrinfo('127.0.0.1', '80') - self._test_getaddrinfo('127.0.0.1', '80', type=socket.SOCK_STREAM) + self._test_getaddrinfo('127.0.0.1', '80', type=socket.SOCK_STREAM, + _patch=patch) def test_getaddrinfo_13(self): + # musl always returns ai_canonname but we don't + patch = self.implementation != 'asyncio' + self._test_getaddrinfo(b'127.0.0.1', b'80') - self._test_getaddrinfo(b'127.0.0.1', b'80', type=socket.SOCK_STREAM) + self._test_getaddrinfo(b'127.0.0.1', b'80', type=socket.SOCK_STREAM, + _patch=patch) def test_getaddrinfo_14(self): + # musl always returns ai_canonname but we don't + patch = self.implementation != 'asyncio' + self._test_getaddrinfo(b'127.0.0.1', b'http') - self._test_getaddrinfo(b'127.0.0.1', b'http', type=socket.SOCK_STREAM) + self._test_getaddrinfo(b'127.0.0.1', b'http', type=socket.SOCK_STREAM, + _patch=patch) def test_getaddrinfo_15(self): + # musl always returns ai_canonname but we don't + patch = self.implementation != 'asyncio' + self._test_getaddrinfo('127.0.0.1', 'http') - self._test_getaddrinfo('127.0.0.1', 'http', type=socket.SOCK_STREAM) + self._test_getaddrinfo('127.0.0.1', 'http', type=socket.SOCK_STREAM, + _patch=patch) def test_getaddrinfo_16(self): self._test_getaddrinfo('localhost', 'http') @@ -128,16 +164,26 @@ def test_getaddrinfo_18(self): self._test_getaddrinfo('localhost', b'http', type=socket.SOCK_STREAM) def test_getaddrinfo_19(self): + # musl always returns ai_canonname while macOS never return for IPs, + # but we strictly follow the docs to use the AI_CANONNAME flag + patch = self.implementation != 'asyncio' + self._test_getaddrinfo('::1', 80) - self._test_getaddrinfo('::1', 80, type=socket.SOCK_STREAM) self._test_getaddrinfo('::1', 80, type=socket.SOCK_STREAM, - flags=socket.AI_CANONNAME) + _patch=patch) + self._test_getaddrinfo('::1', 80, type=socket.SOCK_STREAM, + flags=socket.AI_CANONNAME, _patch=patch) def test_getaddrinfo_20(self): + # musl always returns ai_canonname while macOS never return for IPs, + # but we strictly follow the docs to use the AI_CANONNAME flag + patch = self.implementation != 'asyncio' + self._test_getaddrinfo('127.0.0.1', 80) - self._test_getaddrinfo('127.0.0.1', 80, type=socket.SOCK_STREAM) self._test_getaddrinfo('127.0.0.1', 80, type=socket.SOCK_STREAM, - flags=socket.AI_CANONNAME) + _patch=patch) + self._test_getaddrinfo('127.0.0.1', 80, type=socket.SOCK_STREAM, + flags=socket.AI_CANONNAME, _patch=patch) ###### diff --git a/uvloop/dns.pyx b/uvloop/dns.pyx index 6c1d9744..dbb4786e 100644 --- a/uvloop/dns.pyx +++ b/uvloop/dns.pyx @@ -224,28 +224,6 @@ cdef __static_getaddrinfo(object host, object port, return (family, type, proto) -# This flag is used in __static_getaddrinfo_pyaddr() to manage -# if ai_canonname should be returned when AI_CANONNAME flag is set, -# because its behavior varies in different libc implementations (see #494). -# This flag is lazily set in loop.getaddrinfo() to make sure that -# __static_getaddrinfo_pyaddr() behaves consistently as libc getaddrinfo(). -cdef int __static_getaddrinfo_canonname_mode = 0 - -# Bitmasks for __static_getaddrinfo_canonname_mode: - -# If STATIC_GETADDRINFO_CANONNAME_ON_RETURN is set -cdef int STATIC_GETADDRINFO_CANONNAME_ON_BEHAVIOR_SET = 1 << 0 - -# If ai_canonname should be returned when AI_CANONNAME is set -cdef int STATIC_GETADDRINFO_CANONNAME_ON_RETURN = 1 << 1 - -# If STATIC_GETADDRINFO_CANONNAME_OFF_RETURN is set -cdef int STATIC_GETADDRINFO_CANONNAME_OFF_BEHAVIOR_SET = 1 << 2 - -# If ai_canonname should be returned when AI_CANONNAME is not set -cdef int STATIC_GETADDRINFO_CANONNAME_OFF_RETURN = 1 << 3 - - cdef __static_getaddrinfo_pyaddr(object host, object port, int family, int type, int proto, int flags): @@ -267,10 +245,7 @@ cdef __static_getaddrinfo_pyaddr(object host, object port, except Exception: return - if __static_getaddrinfo_canonname_mode & ( - STATIC_GETADDRINFO_CANONNAME_ON_RETURN if flags & socket_AI_CANONNAME - else STATIC_GETADDRINFO_CANONNAME_OFF_RETURN - ): + if flags & socket_AI_CANONNAME: if isinstance(host, str): canon_name = host else: diff --git a/uvloop/loop.pyx b/uvloop/loop.pyx index 981ed9ce..afaa94e3 100644 --- a/uvloop/loop.pyx +++ b/uvloop/loop.pyx @@ -1523,40 +1523,11 @@ cdef class Loop: @cython.iterable_coroutine async def getaddrinfo(self, object host, object port, *, int family=0, int type=0, int proto=0, int flags=0): - global __static_getaddrinfo_canonname_mode addr = __static_getaddrinfo_pyaddr(host, port, family, type, proto, flags) if addr is not None: - if __static_getaddrinfo_canonname_mode & ( - STATIC_GETADDRINFO_CANONNAME_ON_BEHAVIOR_SET - if flags & socket_AI_CANONNAME - else STATIC_GETADDRINFO_CANONNAME_OFF_BEHAVIOR_SET - ): - return [addr] - - rv = await self._getaddrinfo( - host, port, family, type, proto, flags, 1) - - _af, _type, _proto, canon_name, _addr = rv[0] - if flags & socket_AI_CANONNAME: - __static_getaddrinfo_canonname_mode |= ( - STATIC_GETADDRINFO_CANONNAME_ON_BEHAVIOR_SET - ) - if canon_name: - __static_getaddrinfo_canonname_mode |= ( - STATIC_GETADDRINFO_CANONNAME_ON_RETURN - ) - else: - __static_getaddrinfo_canonname_mode |= ( - STATIC_GETADDRINFO_CANONNAME_OFF_BEHAVIOR_SET - ) - if canon_name: - __static_getaddrinfo_canonname_mode |= ( - STATIC_GETADDRINFO_CANONNAME_OFF_RETURN - ) - - return rv + return [addr] return await self._getaddrinfo( host, port, family, type, proto, flags, 1)