Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Populate find_all_candidates cache from threadpool #10480

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions news/10480.feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Parallelize network requests when finding package candidates for installation.
17 changes: 16 additions & 1 deletion src/pip/_internal/resolution/resolvelib/resolver.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import functools
import logging
import os
from typing import TYPE_CHECKING, Dict, List, Optional, Set, Tuple, cast
from multiprocessing.pool import ThreadPool
from typing import TYPE_CHECKING, Dict, Iterable, List, Optional, Set, Tuple, cast

from pip._vendor.packaging.utils import canonicalize_name
from pip._vendor.resolvelib import BaseReporter, ResolutionImpossible
Expand Down Expand Up @@ -66,6 +67,7 @@ def __init__(
self.ignore_dependencies = ignore_dependencies
self.upgrade_strategy = upgrade_strategy
self._result: Optional[Result] = None
self._finder = finder

def resolve(
self, root_reqs: List[InstallRequirement], check_supported_wheels: bool
Expand All @@ -87,6 +89,8 @@ def resolve(
reporter,
)

self._prime_finder_cache(provider.identify(r) for r in collected.requirements)

try:
limit_how_complex_resolution_can_be = 200000
result = self._result = resolver.resolve(
Expand Down Expand Up @@ -164,6 +168,17 @@ def resolve(
req.needs_more_preparation = False
return req_set

def _prime_finder_cache(self, project_names: Iterable[str]) -> None:
"""Populate finder's find_all_candidates cache

Pre-emptively call the finder's find_all_candidates for each project
in parallel in order to avoid later blocking on network requests during
resolution.
"""
with ThreadPool() as tp:
for _ in tp.imap_unordered(self._finder.find_all_candidates, project_names):
pass

def get_installation_order(
self, req_set: RequirementSet
) -> List[InstallRequirement]:
Expand Down
30 changes: 30 additions & 0 deletions tests/unit/resolution_resolvelib/test_resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,14 @@
from unittest import mock

import pytest
from pip._vendor.packaging.requirements import Requirement
from pip._vendor.packaging.utils import canonicalize_name
from pip._vendor.resolvelib.resolvers import Result
from pip._vendor.resolvelib.structs import DirectedGraph

from pip._internal.index.package_finder import PackageFinder
from pip._internal.operations.prepare import RequirementPreparer
from pip._internal.req import InstallRequirement
from pip._internal.req.constructors import install_req_from_line
from pip._internal.req.req_set import RequirementSet
from pip._internal.resolution.resolvelib.resolver import (
Expand All @@ -30,6 +32,7 @@ def resolver(preparer: RequirementPreparer, finder: PackageFinder) -> Resolver:
force_reinstall=False,
upgrade_strategy="to-satisfy-only",
)
finder.find_all_candidates.cache_clear()
return resolver


Expand Down Expand Up @@ -297,3 +300,30 @@ def test_new_resolver_topological_weights(

weights = get_topological_weights(graph, requirement_keys)
assert weights == expected_weights


def test_resolver_cache_population(resolver: Resolver) -> None:
def get_findall_cacheinfo() -> Dict[str, int]:
cacheinfo = resolver._finder.find_all_candidates.cache_info()
return {k: getattr(cacheinfo, k) for k in ["currsize", "hits", "misses"]}

# empty before any calls
assert get_findall_cacheinfo() == {"currsize": 0, "hits": 0, "misses": 0}

# prime the cache, observe no hits, and size 1
resolver._prime_finder_cache(["simple"])
assert get_findall_cacheinfo() == {"currsize": 1, "hits": 0, "misses": 1}

# reset the cache
resolver._finder.find_all_candidates.cache_clear()

# resolve
simple_req = InstallRequirement(
req=Requirement("simple==3.0"),
comes_from=None,
)
resolver.resolve([simple_req], True)

# if this is 1-1-1, that means the priming populated the cache, and the
# resolution made a cache hit
assert get_findall_cacheinfo() == {"currsize": 1, "hits": 1, "misses": 1}
30 changes: 29 additions & 1 deletion tests/unit/test_finder.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import logging
from typing import Iterable
from typing import Dict, Iterable
from unittest.mock import Mock, patch

import pytest
Expand Down Expand Up @@ -566,3 +566,31 @@ def test_find_all_candidates_find_links_and_index(data: TestData) -> None:
versions = finder.find_all_candidates("simple")
# first the find-links versions then the page versions
assert [str(v.version) for v in versions] == ["3.0", "2.0", "1.0", "1.0"]


def test_finder_caching(data: TestData) -> None:
# This is not required for the behavior of the finder itself, but we exploit
# the implementation of the finder's find_best_candidate consuming it's own
# find_all_candidates cache to pre-populate the cache before the resolution process
# starts

finder = make_test_finder(
find_links=[data.find_links],
index_urls=[data.index_url("simple")],
)
finder.find_all_candidates.cache_clear()

def get_findall_cacheinfo() -> Dict[str, int]:
cacheinfo = finder.find_all_candidates.cache_info()
return {k: getattr(cacheinfo, k) for k in ["currsize", "hits", "misses"]}

# empty before any calls
assert get_findall_cacheinfo() == {"currsize": 0, "hits": 0, "misses": 0}

# first findall is a miss
finder.find_all_candidates("simple")
assert get_findall_cacheinfo() == {"currsize": 1, "hits": 0, "misses": 1}

# find best following a find all is a hit
finder.find_best_candidate("simple")
assert get_findall_cacheinfo() == {"currsize": 1, "hits": 1, "misses": 1}