Skip to content

Commit

Permalink
performance(legacy_repository): introduce link cache to improve perfo…
Browse files Browse the repository at this point in the history
…rmance for legacy repositories

Co-authored-by: Jarrod Moore <jmo@jmo.name>
  • Loading branch information
2 people authored and neersighted committed Sep 8, 2022
1 parent 88ba18d commit cf441e7
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 37 deletions.
38 changes: 21 additions & 17 deletions src/poetry/repositories/link_sources/base.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from __future__ import annotations

import functools
import logging
import re

from abc import abstractmethod
from typing import TYPE_CHECKING

from packaging.utils import canonicalize_name
Expand All @@ -15,6 +15,7 @@


if TYPE_CHECKING:
from collections import defaultdict
from collections.abc import Iterator

from packaging.utils import NormalizedName
Expand All @@ -39,21 +40,16 @@ class LinkSource:

def __init__(self, url: str) -> None:
self._url = url
self._get_link_cache_wrapper = functools.lru_cache(maxsize=1)(
self._get_link_cache
)

@property
def url(self) -> str:
return self._url

def versions(self, name: str) -> Iterator[Version]:
name = canonicalize_name(name)
seen: set[Version] = set()

for link in self.links:
pkg = self.link_package_data(link)

if pkg and pkg.name == name and pkg.version not in seen:
seen.add(pkg.version)
yield pkg.version
yield from self._link_cache[canonicalize_name(name)]

@property
def packages(self) -> Iterator[Package]:
Expand All @@ -64,9 +60,10 @@ def packages(self) -> Iterator[Package]:
yield pkg

@property
@abstractmethod
def links(self) -> Iterator[Link]:
raise NotImplementedError()
for links_per_version in self._link_cache.values():
for links in links_per_version.values():
yield from links

@classmethod
def link_package_data(cls, link: Link) -> Package | None:
Expand Down Expand Up @@ -102,11 +99,7 @@ def link_package_data(cls, link: Link) -> Package | None:
def links_for_version(
self, name: NormalizedName, version: Version
) -> Iterator[Link]:
for link in self.links:
pkg = self.link_package_data(link)

if pkg and pkg.name == name and pkg.version == version:
yield link
yield from self._link_cache[name][version]

def clean_link(self, url: str) -> str:
"""Makes sure a link is fully encoded. That is, if a ' ' shows up in
Expand All @@ -127,3 +120,14 @@ def yanked(self, name: NormalizedName, version: Version) -> str | bool:
if reasons:
return "\n".join(sorted(reasons))
return True

@property
def _link_cache(
self,
) -> defaultdict[NormalizedName, defaultdict[Version, list[Link]]]:
return self._get_link_cache_wrapper()

def _get_link_cache(
self,
) -> defaultdict[NormalizedName, defaultdict[Version, list[Link]]]:
raise NotImplementedError()
23 changes: 19 additions & 4 deletions src/poetry/repositories/link_sources/html.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,20 @@
import urllib.parse
import warnings

from collections import defaultdict
from html import unescape
from typing import TYPE_CHECKING

from packaging.utils import canonicalize_name
from poetry.core.packages.utils.link import Link
from poetry.core.semver.version import Version

from poetry.repositories.link_sources.base import LinkSource


if TYPE_CHECKING:
from collections.abc import Iterator
from packaging.utils import NormalizedName


with warnings.catch_warnings():
warnings.simplefilter("ignore")
Expand All @@ -25,8 +29,12 @@ def __init__(self, url: str, content: str) -> None:

self._parsed = html5lib.parse(content, namespaceHTMLElements=False)

@property
def links(self) -> Iterator[Link]:
def _get_link_cache(
self,
) -> defaultdict[NormalizedName, defaultdict[Version, list[Link]]]:
links: defaultdict[
NormalizedName, defaultdict[Version, list[Link]]
] = defaultdict(lambda: defaultdict(list))
for anchor in self._parsed.findall(".//a"):
if anchor.get("href"):
href = anchor.get("href")
Expand All @@ -44,7 +52,14 @@ def links(self) -> Iterator[Link]:
if link.ext not in self.SUPPORTED_FORMATS:
continue

yield link
pkg = self.link_package_data(link)
if pkg:
links[pkg.name][pkg.version].append(link)
else:
# dummy name and version
links[canonicalize_name("")][Version.parse("0")].append(link)

return links


class SimpleRepositoryPage(HTMLPage):
Expand Down
49 changes: 33 additions & 16 deletions tests/repositories/link_sources/test_base.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
from __future__ import annotations

from collections import defaultdict
from typing import TYPE_CHECKING
from unittest.mock import PropertyMock

import pytest

from packaging.utils import NormalizedName
from packaging.utils import canonicalize_name
from poetry.core.packages.package import Package
from poetry.core.packages.utils.link import Link
Expand All @@ -22,21 +23,37 @@
@pytest.fixture
def link_source(mocker: MockerFixture) -> LinkSource:
url = "https://example.org"
link_source = LinkSource(url)
mocker.patch(
f"{LinkSource.__module__}.{LinkSource.__qualname__}.links",
new_callable=PropertyMock,
return_value=iter(
[
Link(f"{url}/demo-0.1.0.tar.gz"),
Link(f"{url}/demo-0.1.0_invalid.tar.gz"),
Link(f"{url}/invalid.tar.gz"),
Link(f"{url}/demo-0.1.0-py2.py3-none-any.whl"),
Link(f"{url}/demo-0.1.1.tar.gz"),
]
),
)
return link_source

class LinkSourceMock(LinkSource):
def _get_link_cache(
self,
) -> defaultdict[NormalizedName, defaultdict[Version, list[Link]]]:
return defaultdict(
lambda: defaultdict(list),
{
canonicalize_name("demo"): defaultdict(
list,
{
Version.parse("0.1.0"): [
Link(f"{url}/demo-0.1.0.tar.gz"),
Link(f"{url}/demo-0.1.0-py2.py3-none-any.whl"),
],
Version.parse("0.1.1"): [Link(f"{url}/demo-0.1.1.tar.gz")],
},
),
canonicalize_name(""): defaultdict(
list,
{
Version.parse("0"): [
Link(f"{url}/demo-0.1.0_invalid.tar.gz"),
Link(f"{url}/invalid.tar.gz"),
]
},
),
},
)

return LinkSourceMock(url)


@pytest.mark.parametrize(
Expand Down

0 comments on commit cf441e7

Please sign in to comment.