diff --git a/CHANGES/9204.misc.rst b/CHANGES/9204.misc.rst new file mode 100644 index 00000000000..da12a7df6f7 --- /dev/null +++ b/CHANGES/9204.misc.rst @@ -0,0 +1 @@ +Significantly speed up filtering cookies -- by :user:`bdraco`. diff --git a/aiohttp/cookiejar.py b/aiohttp/cookiejar.py index 9b7f38d6685..85fd7716b56 100644 --- a/aiohttp/cookiejar.py +++ b/aiohttp/cookiejar.py @@ -94,6 +94,9 @@ def __init__( self._cookies: DefaultDict[Tuple[str, str], SimpleCookie] = defaultdict( SimpleCookie ) + self._morsel_cache: DefaultDict[Tuple[str, str], Dict[str, Morsel[str]]] = ( + defaultdict(dict) + ) self._host_only_cookies: Set[Tuple[str, str]] = set() self._unsafe = unsafe self._quote_cookie = quote_cookie @@ -129,6 +132,7 @@ def clear(self, predicate: Optional[ClearCookiePredicate] = None) -> None: if predicate is None: self._expire_heap.clear() self._cookies.clear() + self._morsel_cache.clear() self._host_only_cookies.clear() self._expirations.clear() return @@ -210,6 +214,7 @@ def _delete_cookies(self, to_del: List[Tuple[str, str, str]]) -> None: for domain, path, name in to_del: self._host_only_cookies.discard((domain, name)) self._cookies[(domain, path)].pop(name, None) + self._morsel_cache[(domain, path)].pop(name, None) self._expirations.pop((domain, path, name), None) def _expire_cookie(self, when: float, domain: str, path: str, name: str) -> None: @@ -285,7 +290,12 @@ def update_cookies(self, cookies: LooseCookies, response_url: URL = URL()) -> No else: cookie["expires"] = "" - self._cookies[(domain, path)][name] = cookie + key = (domain, path) + if self._cookies[key].get(name) != cookie: + # Don't blow away the cache if the same + # cookie gets set again + self._cookies[key][name] = cookie + self._morsel_cache[key].pop(name, None) self._do_expiration() @@ -337,30 +347,33 @@ def filter_cookies(self, request_url: URL) -> "BaseCookie[str]": # Create every combination of (domain, path) pairs. pairs = itertools.product(domains, paths) - # Point 2: https://www.rfc-editor.org/rfc/rfc6265.html#section-5.4 - cookies = itertools.chain.from_iterable( - self._cookies[p].values() for p in pairs - ) path_len = len(request_url.path) - for cookie in cookies: - name = cookie.key - domain = cookie["domain"] + # Point 2: https://www.rfc-editor.org/rfc/rfc6265.html#section-5.4 + for p in pairs: + for name, cookie in self._cookies[p].items(): + domain = cookie["domain"] - if (domain, name) in self._host_only_cookies and domain != hostname: - continue + if (domain, name) in self._host_only_cookies and domain != hostname: + continue - # Skip edge case when the cookie has a trailing slash but request doesn't. - if len(cookie["path"]) > path_len: - continue + # Skip edge case when the cookie has a trailing slash but request doesn't. + if len(cookie["path"]) > path_len: + continue - if is_not_secure and cookie["secure"]: - continue + if is_not_secure and cookie["secure"]: + continue + + # We already built the Morsel so reuse it here + if name in self._morsel_cache[p]: + filtered[name] = self._morsel_cache[p][name] + continue - # It's critical we use the Morsel so the coded_value - # (based on cookie version) is preserved - mrsl_val = cast("Morsel[str]", cookie.get(cookie.key, Morsel())) - mrsl_val.set(cookie.key, cookie.value, cookie.coded_value) - filtered[name] = mrsl_val + # It's critical we use the Morsel so the coded_value + # (based on cookie version) is preserved + mrsl_val = cast("Morsel[str]", cookie.get(cookie.key, Morsel())) + mrsl_val.set(cookie.key, cookie.value, cookie.coded_value) + self._morsel_cache[p][name] = mrsl_val + filtered[name] = mrsl_val return filtered