From 836aa01a9fe56b25c1d4f43882ead3377448e1d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Randy=20D=C3=B6ring?= <30527984+radoering@users.noreply.github.com> Date: Wed, 7 Sep 2022 23:11:13 +0200 Subject: [PATCH 1/4] performance(legacy_repository): introduce link cache to improve performance for legacy repositories Co-authored-by: Jarrod Moore --- src/poetry/repositories/link_sources/base.py | 39 ++++++++++--------- src/poetry/repositories/link_sources/html.py | 19 +++++++-- tests/repositories/link_sources/test_base.py | 41 ++++++++++++-------- tests/repositories/link_sources/test_html.py | 3 +- tests/repositories/test_legacy_repository.py | 17 +------- 5 files changed, 64 insertions(+), 55 deletions(-) diff --git a/src/poetry/repositories/link_sources/base.py b/src/poetry/repositories/link_sources/base.py index 6e07de3cfdd..0e5aa289800 100644 --- a/src/poetry/repositories/link_sources/base.py +++ b/src/poetry/repositories/link_sources/base.py @@ -1,10 +1,12 @@ from __future__ import annotations +import functools import logging import re -from abc import abstractmethod from typing import TYPE_CHECKING +from typing import DefaultDict +from typing import List from packaging.utils import canonicalize_name from poetry.core.packages.package import Package @@ -20,6 +22,8 @@ from packaging.utils import NormalizedName from poetry.core.packages.utils.link import Link + LinkCache = DefaultDict[NormalizedName, DefaultDict[Version, List[Link]]] + logger = logging.getLogger(__name__) @@ -39,21 +43,16 @@ class LinkSource: def __init__(self, url: str) -> None: self._url = url + self._get_link_cache_wrapper = functools.lru_cache(maxsize=1)( + self._get_link_cache + ) @property def url(self) -> str: return self._url - def versions(self, name: str) -> Iterator[Version]: - name = canonicalize_name(name) - seen: set[Version] = set() - - for link in self.links: - pkg = self.link_package_data(link) - - if pkg and pkg.name == name and pkg.version not in seen: - seen.add(pkg.version) - yield pkg.version + def versions(self, name: NormalizedName) -> Iterator[Version]: + yield from self._link_cache[name] @property def packages(self) -> Iterator[Package]: @@ -64,9 +63,10 @@ def packages(self) -> Iterator[Package]: yield pkg @property - @abstractmethod def links(self) -> Iterator[Link]: - raise NotImplementedError() + for links_per_version in self._link_cache.values(): + for links in links_per_version.values(): + yield from links @classmethod def link_package_data(cls, link: Link) -> Package | None: @@ -102,11 +102,7 @@ def link_package_data(cls, link: Link) -> Package | None: def links_for_version( self, name: NormalizedName, version: Version ) -> Iterator[Link]: - for link in self.links: - pkg = self.link_package_data(link) - - if pkg and pkg.name == name and pkg.version == version: - yield link + yield from self._link_cache[name][version] def clean_link(self, url: str) -> str: """Makes sure a link is fully encoded. That is, if a ' ' shows up in @@ -127,3 +123,10 @@ def yanked(self, name: NormalizedName, version: Version) -> str | bool: if reasons: return "\n".join(sorted(reasons)) return True + + @property + def _link_cache(self) -> LinkCache: + return self._get_link_cache_wrapper() + + def _get_link_cache(self) -> LinkCache: + raise NotImplementedError() diff --git a/src/poetry/repositories/link_sources/html.py b/src/poetry/repositories/link_sources/html.py index d72a8e06ded..aa1c700e879 100644 --- a/src/poetry/repositories/link_sources/html.py +++ b/src/poetry/repositories/link_sources/html.py @@ -3,6 +3,7 @@ import urllib.parse import warnings +from collections import defaultdict from html import unescape from typing import TYPE_CHECKING @@ -12,7 +13,11 @@ if TYPE_CHECKING: - from collections.abc import Iterator + from packaging.utils import NormalizedName + from poetry.core.semver.version import Version + + from poetry.repositories.link_sources.base import LinkCache + with warnings.catch_warnings(): warnings.simplefilter("ignore") @@ -25,8 +30,10 @@ def __init__(self, url: str, content: str) -> None: self._parsed = html5lib.parse(content, namespaceHTMLElements=False) - @property - def links(self) -> Iterator[Link]: + def _get_link_cache(self) -> LinkCache: + links: defaultdict[ + NormalizedName, defaultdict[Version, list[Link]] + ] = defaultdict(lambda: defaultdict(list)) for anchor in self._parsed.findall(".//a"): if anchor.get("href"): href = anchor.get("href") @@ -44,7 +51,11 @@ def links(self) -> Iterator[Link]: if link.ext not in self.SUPPORTED_FORMATS: continue - yield link + pkg = self.link_package_data(link) + if pkg: + links[pkg.name][pkg.version].append(link) + + return links class SimpleRepositoryPage(HTMLPage): diff --git a/tests/repositories/link_sources/test_base.py b/tests/repositories/link_sources/test_base.py index c949f90a5eb..394ccf7e6c8 100644 --- a/tests/repositories/link_sources/test_base.py +++ b/tests/repositories/link_sources/test_base.py @@ -1,7 +1,7 @@ from __future__ import annotations +from collections import defaultdict from typing import TYPE_CHECKING -from unittest.mock import PropertyMock import pytest @@ -18,25 +18,32 @@ from pytest_mock import MockerFixture + from poetry.repositories.link_sources.base import LinkCache + @pytest.fixture def link_source(mocker: MockerFixture) -> LinkSource: url = "https://example.org" - link_source = LinkSource(url) - mocker.patch( - f"{LinkSource.__module__}.{LinkSource.__qualname__}.links", - new_callable=PropertyMock, - return_value=iter( - [ - Link(f"{url}/demo-0.1.0.tar.gz"), - Link(f"{url}/demo-0.1.0_invalid.tar.gz"), - Link(f"{url}/invalid.tar.gz"), - Link(f"{url}/demo-0.1.0-py2.py3-none-any.whl"), - Link(f"{url}/demo-0.1.1.tar.gz"), - ] - ), - ) - return link_source + + class LinkSourceMock(LinkSource): + def _get_link_cache(self) -> LinkCache: + return defaultdict( + lambda: defaultdict(list), + { + canonicalize_name("demo"): defaultdict( + list, + { + Version.parse("0.1.0"): [ + Link(f"{url}/demo-0.1.0.tar.gz"), + Link(f"{url}/demo-0.1.0-py2.py3-none-any.whl"), + ], + Version.parse("0.1.1"): [Link(f"{url}/demo-0.1.1.tar.gz")], + }, + ), + }, + ) + + return LinkSourceMock(url) @pytest.mark.parametrize( @@ -63,7 +70,7 @@ def test_link_package_data(filename: str, expected: Package | None) -> None: ], ) def test_versions(name: str, expected: set[Version], link_source: LinkSource) -> None: - assert set(link_source.versions(name)) == expected + assert set(link_source.versions(canonicalize_name(name))) == expected def test_packages(link_source: LinkSource) -> None: diff --git a/tests/repositories/link_sources/test_html.py b/tests/repositories/link_sources/test_html.py index 35b9ebfaa88..780262d9ada 100644 --- a/tests/repositories/link_sources/test_html.py +++ b/tests/repositories/link_sources/test_html.py @@ -2,6 +2,7 @@ import pytest +from packaging.utils import canonicalize_name from poetry.core.packages.utils.link import Link from poetry.core.semver.version import Version @@ -90,4 +91,4 @@ def test_yanked(yanked_attrs: tuple[str, str], expected: bool | str) -> None: content = DEMO_TEMPLATE.format(anchors) page = HTMLPage("https://example.org", content) - assert page.yanked("demo", Version.parse("0.1")) == expected + assert page.yanked(canonicalize_name("demo"), Version.parse("0.1")) == expected diff --git a/tests/repositories/test_legacy_repository.py b/tests/repositories/test_legacy_repository.py index 061471409d9..042e8064a52 100644 --- a/tests/repositories/test_legacy_repository.py +++ b/tests/repositories/test_legacy_repository.py @@ -102,25 +102,12 @@ def test_page_invalid_version_link() -> None: assert page is not None links = list(page.links) - assert len(links) == 2 + assert len(links) == 1 - versions = list(page.versions("poetry")) + versions = list(page.versions(canonicalize_name("poetry"))) assert len(versions) == 1 assert versions[0].to_string() == "0.1.0" - invalid_link = None - - for link in links: - if link.filename.startswith("poetry-21"): - invalid_link = link - break - - links_010 = list(page.links_for_version(canonicalize_name("poetry"), versions[0])) - assert invalid_link not in links_010 - - assert invalid_link - assert not page.link_package_data(invalid_link) - packages = list(page.packages) assert len(packages) == 1 assert packages[0].name == "poetry" From e521873e038b8c55dac2fb223cc51fe2d9442916 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Randy=20D=C3=B6ring?= <30527984+radoering@users.noreply.github.com> Date: Thu, 8 Sep 2022 21:00:57 +0200 Subject: [PATCH 2/4] refactor(links_sources): use cached_property --- poetry.lock | 14 ++++++- pyproject.toml | 1 + src/poetry/repositories/link_sources/base.py | 10 +---- src/poetry/repositories/link_sources/html.py | 4 +- src/poetry/utils/_compat.py | 16 ++++++- tests/repositories/link_sources/test_base.py | 44 ++++++++++---------- 6 files changed, 56 insertions(+), 33 deletions(-) diff --git a/poetry.lock b/poetry.lock index 7f8dd525f4b..35e64522660 100644 --- a/poetry.lock +++ b/poetry.lock @@ -20,6 +20,14 @@ docs = ["furo", "sphinx", "sphinx-notfound-page", "zope.interface"] tests = ["cloudpickle", "coverage[toml] (>=5.0.2)", "hypothesis", "mypy (>=0.900,!=0.940)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "zope.interface"] tests_no_zope = ["cloudpickle", "coverage[toml] (>=5.0.2)", "hypothesis", "mypy (>=0.900,!=0.940)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins"] +[[package]] +name = "backports.cached-property" +version = "1.0.2" +description = "cached_property() - computed once per instance, cached as attribute" +category = "main" +optional = false +python-versions = ">=3.6.0" + [[package]] name = "CacheControl" version = "0.12.11" @@ -931,7 +939,7 @@ testing = ["func-timeout", "jaraco.itertools", "pytest (>=6)", "pytest-black (>= [metadata] lock-version = "1.1" python-versions = "^3.7" -content-hash = "3810aef128102dea7cd2797cfcee77f1345831ed057aac5a962916a76e29fb6a" +content-hash = "7e40cde2399843d53e715f657b703397ce236196663788ada896d7e2a53c869a" [metadata.files] atomicwrites = [ @@ -941,6 +949,10 @@ attrs = [ {file = "attrs-22.1.0-py2.py3-none-any.whl", hash = "sha256:86efa402f67bf2df34f51a335487cf46b1ec130d02b8d39fd248abfd30da551c"}, {file = "attrs-22.1.0.tar.gz", hash = "sha256:29adc2665447e5191d0e7c568fde78b21f9672d344281d0c6e1ab085429b22b6"}, ] +"backports.cached-property" = [ + {file = "backports.cached-property-1.0.2.tar.gz", hash = "sha256:9306f9eed6ec55fd156ace6bc1094e2c86fae5fb2bf07b6a9c00745c656e75dd"}, + {file = "backports.cached_property-1.0.2-py3-none-any.whl", hash = "sha256:baeb28e1cd619a3c9ab8941431fe34e8490861fb998c6c4590693d50171db0cc"}, +] CacheControl = [ {file = "CacheControl-0.12.11-py2.py3-none-any.whl", hash = "sha256:2c75d6a8938cb1933c75c50184549ad42728a27e9f6b92fd677c3151aa72555b"}, {file = "CacheControl-0.12.11.tar.gz", hash = "sha256:a5b9fcc986b184db101aa280b42ecdcdfc524892596f606858e0b7a8b4d9e144"}, diff --git a/pyproject.toml b/pyproject.toml index 5f7bbde2e52..8d95d3811c6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -46,6 +46,7 @@ python = "^3.7" poetry-core = "^1.1.0" poetry-plugin-export = "^1.0.6" +"backports.cached-property" = { version = "^1.0.2", python = "<3.8" } cachecontrol = { version = "^0.12.9", extras = ["filecache"] } cachy = "^0.3.0" cleo = "^1.0.0a5" diff --git a/src/poetry/repositories/link_sources/base.py b/src/poetry/repositories/link_sources/base.py index 0e5aa289800..fb6dc67b6b3 100644 --- a/src/poetry/repositories/link_sources/base.py +++ b/src/poetry/repositories/link_sources/base.py @@ -1,6 +1,5 @@ from __future__ import annotations -import functools import logging import re @@ -12,6 +11,7 @@ from poetry.core.packages.package import Package from poetry.core.semver.version import Version +from poetry.utils._compat import cached_property from poetry.utils.patterns import sdist_file_re from poetry.utils.patterns import wheel_file_re @@ -43,9 +43,6 @@ class LinkSource: def __init__(self, url: str) -> None: self._url = url - self._get_link_cache_wrapper = functools.lru_cache(maxsize=1)( - self._get_link_cache - ) @property def url(self) -> str: @@ -124,9 +121,6 @@ def yanked(self, name: NormalizedName, version: Version) -> str | bool: return "\n".join(sorted(reasons)) return True - @property + @cached_property def _link_cache(self) -> LinkCache: - return self._get_link_cache_wrapper() - - def _get_link_cache(self) -> LinkCache: raise NotImplementedError() diff --git a/src/poetry/repositories/link_sources/html.py b/src/poetry/repositories/link_sources/html.py index aa1c700e879..f9dc890663f 100644 --- a/src/poetry/repositories/link_sources/html.py +++ b/src/poetry/repositories/link_sources/html.py @@ -10,6 +10,7 @@ from poetry.core.packages.utils.link import Link from poetry.repositories.link_sources.base import LinkSource +from poetry.utils._compat import cached_property if TYPE_CHECKING: @@ -30,7 +31,8 @@ def __init__(self, url: str, content: str) -> None: self._parsed = html5lib.parse(content, namespaceHTMLElements=False) - def _get_link_cache(self) -> LinkCache: + @cached_property + def _link_cache(self) -> LinkCache: links: defaultdict[ NormalizedName, defaultdict[Version, list[Link]] ] = defaultdict(lambda: defaultdict(list)) diff --git a/src/poetry/utils/_compat.py b/src/poetry/utils/_compat.py index a1c81582366..9ee8875f061 100644 --- a/src/poetry/utils/_compat.py +++ b/src/poetry/utils/_compat.py @@ -13,6 +13,12 @@ else: from importlib import metadata +if sys.version_info < (3, 8): + # compatibility for python <3.8 + from backports.cached_property import cached_property +else: + from functools import cached_property + WINDOWS = sys.platform == "win32" @@ -53,4 +59,12 @@ def list_to_shell_command(cmd: list[str]) -> str: ) -__all__ = ["WINDOWS", "decode", "encode", "list_to_shell_command", "metadata", "to_str"] +__all__ = [ + "WINDOWS", + "cached_property", + "decode", + "encode", + "list_to_shell_command", + "metadata", + "to_str", +] diff --git a/tests/repositories/link_sources/test_base.py b/tests/repositories/link_sources/test_base.py index 394ccf7e6c8..ef400a716d7 100644 --- a/tests/repositories/link_sources/test_base.py +++ b/tests/repositories/link_sources/test_base.py @@ -2,6 +2,7 @@ from collections import defaultdict from typing import TYPE_CHECKING +from unittest.mock import PropertyMock import pytest @@ -18,32 +19,31 @@ from pytest_mock import MockerFixture - from poetry.repositories.link_sources.base import LinkCache - @pytest.fixture def link_source(mocker: MockerFixture) -> LinkSource: url = "https://example.org" - - class LinkSourceMock(LinkSource): - def _get_link_cache(self) -> LinkCache: - return defaultdict( - lambda: defaultdict(list), - { - canonicalize_name("demo"): defaultdict( - list, - { - Version.parse("0.1.0"): [ - Link(f"{url}/demo-0.1.0.tar.gz"), - Link(f"{url}/demo-0.1.0-py2.py3-none-any.whl"), - ], - Version.parse("0.1.1"): [Link(f"{url}/demo-0.1.1.tar.gz")], - }, - ), - }, - ) - - return LinkSourceMock(url) + link_source = LinkSource(url) + mocker.patch( + f"{LinkSource.__module__}.{LinkSource.__qualname__}._link_cache", + new_callable=PropertyMock, + return_value=defaultdict( + lambda: defaultdict(list), + { + canonicalize_name("demo"): defaultdict( + list, + { + Version.parse("0.1.0"): [ + Link(f"{url}/demo-0.1.0.tar.gz"), + Link(f"{url}/demo-0.1.0-py2.py3-none-any.whl"), + ], + Version.parse("0.1.1"): [Link(f"{url}/demo-0.1.1.tar.gz")], + }, + ), + }, + ), + ) + return link_source @pytest.mark.parametrize( From 9403cae05afc2eb38a1fc29a8e3451e65b23f415 Mon Sep 17 00:00:00 2001 From: Bjorn Neergaard Date: Thu, 8 Sep 2022 14:26:06 -0600 Subject: [PATCH 3/4] Update src/poetry/repositories/link_sources/html.py --- src/poetry/repositories/link_sources/html.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/poetry/repositories/link_sources/html.py b/src/poetry/repositories/link_sources/html.py index f9dc890663f..f4dd0ce3dca 100644 --- a/src/poetry/repositories/link_sources/html.py +++ b/src/poetry/repositories/link_sources/html.py @@ -33,9 +33,7 @@ def __init__(self, url: str, content: str) -> None: @cached_property def _link_cache(self) -> LinkCache: - links: defaultdict[ - NormalizedName, defaultdict[Version, list[Link]] - ] = defaultdict(lambda: defaultdict(list)) + links: LinkCache = defaultdict(lambda: defaultdict(list)) for anchor in self._parsed.findall(".//a"): if anchor.get("href"): href = anchor.get("href") From d476bb9471d744908edbbeb68a63f28fe9d67114 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 9 Sep 2022 05:04:05 +0000 Subject: [PATCH 4/4] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- src/poetry/repositories/link_sources/html.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/poetry/repositories/link_sources/html.py b/src/poetry/repositories/link_sources/html.py index f4dd0ce3dca..5ea4eff78d0 100644 --- a/src/poetry/repositories/link_sources/html.py +++ b/src/poetry/repositories/link_sources/html.py @@ -14,9 +14,6 @@ if TYPE_CHECKING: - from packaging.utils import NormalizedName - from poetry.core.semver.version import Version - from poetry.repositories.link_sources.base import LinkCache