From 47a8480065d4eac252edb3673cfaecccb7dbf3c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?St=C3=A9phane=20Bidoul?= Date: Fri, 29 Sep 2023 22:11:12 +0200 Subject: [PATCH] Upgrade vendored packaging lib --- news/packaging.vendor.rst | 1 + src/pip/_internal/commands/check.py | 2 - src/pip/_internal/commands/download.py | 1 - src/pip/_internal/commands/index.py | 6 +- src/pip/_internal/commands/install.py | 3 - src/pip/_internal/commands/list.py | 4 +- src/pip/_internal/commands/wheel.py | 1 - src/pip/_internal/metadata/base.py | 6 +- .../_internal/metadata/importlib/_dists.py | 4 +- src/pip/_internal/metadata/pkg_resources.py | 4 +- src/pip/_internal/operations/check.py | 44 +- src/pip/_internal/req/req_set.py | 37 - .../_internal/resolution/resolvelib/base.py | 7 +- .../resolution/resolvelib/candidates.py | 16 +- .../resolution/resolvelib/factory.py | 11 +- src/pip/_internal/self_outdated_check.py | 4 +- src/pip/_vendor/packaging/__about__.py | 26 - src/pip/_vendor/packaging/__init__.py | 30 +- src/pip/_vendor/packaging/_elffile.py | 108 +++ src/pip/_vendor/packaging/_manylinux.py | 211 ++-- src/pip/_vendor/packaging/_musllinux.py | 89 +- src/pip/_vendor/packaging/_parser.py | 359 +++++++ src/pip/_vendor/packaging/_tokenizer.py | 192 ++++ src/pip/_vendor/packaging/markers.py | 204 ++-- src/pip/_vendor/packaging/metadata.py | 822 ++++++++++++++++ src/pip/_vendor/packaging/requirements.py | 154 +-- src/pip/_vendor/packaging/specifiers.py | 918 +++++++++++------- src/pip/_vendor/packaging/tags.py | 94 +- src/pip/_vendor/packaging/utils.py | 50 +- src/pip/_vendor/packaging/version.py | 395 ++++---- src/pip/_vendor/vendor.txt | 2 +- 31 files changed, 2662 insertions(+), 1143 deletions(-) create mode 100644 news/packaging.vendor.rst delete mode 100644 src/pip/_vendor/packaging/__about__.py create mode 100644 src/pip/_vendor/packaging/_elffile.py create mode 100644 src/pip/_vendor/packaging/_parser.py create mode 100644 src/pip/_vendor/packaging/_tokenizer.py create mode 100644 src/pip/_vendor/packaging/metadata.py diff --git a/news/packaging.vendor.rst b/news/packaging.vendor.rst new file mode 100644 index 00000000000..1834f6df449 --- /dev/null +++ b/news/packaging.vendor.rst @@ -0,0 +1 @@ +Upgrade packaging to 23.2 diff --git a/src/pip/_internal/commands/check.py b/src/pip/_internal/commands/check.py index 5efd0a34160..584df9f55c5 100644 --- a/src/pip/_internal/commands/check.py +++ b/src/pip/_internal/commands/check.py @@ -7,7 +7,6 @@ from pip._internal.operations.check import ( check_package_set, create_package_set_from_installed, - warn_legacy_versions_and_specifiers, ) from pip._internal.utils.misc import write_output @@ -22,7 +21,6 @@ class CheckCommand(Command): def run(self, options: Values, args: List[str]) -> int: package_set, parsing_probs = create_package_set_from_installed() - warn_legacy_versions_and_specifiers(package_set) missing, conflicting = check_package_set(package_set) for project_name in missing: diff --git a/src/pip/_internal/commands/download.py b/src/pip/_internal/commands/download.py index 54247a78a65..917bbb91d83 100644 --- a/src/pip/_internal/commands/download.py +++ b/src/pip/_internal/commands/download.py @@ -139,7 +139,6 @@ def run(self, options: Values, args: List[str]) -> int: downloaded.append(req.name) preparer.prepare_linked_requirements_more(requirement_set.requirements.values()) - requirement_set.warn_legacy_versions_and_specifiers() if downloaded: write_output("Successfully downloaded %s", " ".join(downloaded)) diff --git a/src/pip/_internal/commands/index.py b/src/pip/_internal/commands/index.py index f55e9e49974..2e2661bba71 100644 --- a/src/pip/_internal/commands/index.py +++ b/src/pip/_internal/commands/index.py @@ -1,8 +1,8 @@ import logging from optparse import Values -from typing import Any, Iterable, List, Optional, Union +from typing import Any, Iterable, List, Optional -from pip._vendor.packaging.version import LegacyVersion, Version +from pip._vendor.packaging.version import Version from pip._internal.cli import cmdoptions from pip._internal.cli.req_command import IndexGroupCommand @@ -115,7 +115,7 @@ def get_available_package_versions(self, options: Values, args: List[Any]) -> No ignore_requires_python=options.ignore_requires_python, ) - versions: Iterable[Union[LegacyVersion, Version]] = ( + versions: Iterable[Version] = ( candidate.version for candidate in finder.find_all_candidates(query) ) diff --git a/src/pip/_internal/commands/install.py b/src/pip/_internal/commands/install.py index 6cf7571e4a6..a9e83f9d8cd 100644 --- a/src/pip/_internal/commands/install.py +++ b/src/pip/_internal/commands/install.py @@ -387,9 +387,6 @@ def run(self, options: Values, args: List[str]) -> int: json.dump(report.to_dict(), f, indent=2, ensure_ascii=False) if options.dry_run: - # In non dry-run mode, the legacy versions and specifiers check - # will be done as part of conflict detection. - requirement_set.warn_legacy_versions_and_specifiers() would_install_items = sorted( (r.metadata["name"], r.metadata["version"]) for r in requirement_set.requirements_to_install diff --git a/src/pip/_internal/commands/list.py b/src/pip/_internal/commands/list.py index f510919910e..171461aebaf 100644 --- a/src/pip/_internal/commands/list.py +++ b/src/pip/_internal/commands/list.py @@ -4,6 +4,7 @@ from typing import TYPE_CHECKING, Generator, List, Optional, Sequence, Tuple, cast from pip._vendor.packaging.utils import canonicalize_name +from pip._vendor.packaging.version import Version from pip._internal.cli import cmdoptions from pip._internal.cli.req_command import IndexGroupCommand @@ -18,7 +19,6 @@ from pip._internal.utils.misc import tabulate, write_output if TYPE_CHECKING: - from pip._internal.metadata.base import DistributionVersion class _DistWithLatestInfo(BaseDistribution): """Give the distribution object a couple of extra fields. @@ -27,7 +27,7 @@ class _DistWithLatestInfo(BaseDistribution): makes the rest of the code much cleaner. """ - latest_version: DistributionVersion + latest_version: Version latest_filetype: str _ProcessedDists = Sequence[_DistWithLatestInfo] diff --git a/src/pip/_internal/commands/wheel.py b/src/pip/_internal/commands/wheel.py index ed578aa2500..278719f4e0c 100644 --- a/src/pip/_internal/commands/wheel.py +++ b/src/pip/_internal/commands/wheel.py @@ -154,7 +154,6 @@ def run(self, options: Values, args: List[str]) -> int: reqs_to_build.append(req) preparer.prepare_linked_requirements_more(requirement_set.requirements.values()) - requirement_set.warn_legacy_versions_and_specifiers() # build wheels build_successes, build_failures = build( diff --git a/src/pip/_internal/metadata/base.py b/src/pip/_internal/metadata/base.py index af0412c819c..843a4e60aeb 100644 --- a/src/pip/_internal/metadata/base.py +++ b/src/pip/_internal/metadata/base.py @@ -25,7 +25,7 @@ from pip._vendor.packaging.requirements import Requirement from pip._vendor.packaging.specifiers import InvalidSpecifier, SpecifierSet from pip._vendor.packaging.utils import NormalizedName, canonicalize_name -from pip._vendor.packaging.version import LegacyVersion, Version +from pip._vendor.packaging.version import Version from pip._internal.exceptions import NoneMetadataError from pip._internal.locations import site_packages, user_site @@ -41,8 +41,6 @@ from ._json import msg_to_json -DistributionVersion = Union[LegacyVersion, Version] - InfoPath = Union[str, pathlib.PurePath] logger = logging.getLogger(__name__) @@ -274,7 +272,7 @@ def canonical_name(self) -> NormalizedName: raise NotImplementedError() @property - def version(self) -> DistributionVersion: + def version(self) -> Version: raise NotImplementedError() @property diff --git a/src/pip/_internal/metadata/importlib/_dists.py b/src/pip/_internal/metadata/importlib/_dists.py index 8591029f16e..9eee474ea5b 100644 --- a/src/pip/_internal/metadata/importlib/_dists.py +++ b/src/pip/_internal/metadata/importlib/_dists.py @@ -16,13 +16,13 @@ from pip._vendor.packaging.requirements import Requirement from pip._vendor.packaging.utils import NormalizedName, canonicalize_name +from pip._vendor.packaging.version import Version from pip._vendor.packaging.version import parse as parse_version from pip._internal.exceptions import InvalidWheel, UnsupportedWheel from pip._internal.metadata.base import ( BaseDistribution, BaseEntryPoint, - DistributionVersion, InfoPath, Wheel, ) @@ -171,7 +171,7 @@ def canonical_name(self) -> NormalizedName: return canonicalize_name(name) @property - def version(self) -> DistributionVersion: + def version(self) -> Version: return parse_version(self._dist.version) def is_file(self, path: InfoPath) -> bool: diff --git a/src/pip/_internal/metadata/pkg_resources.py b/src/pip/_internal/metadata/pkg_resources.py index bb11e5bd8a5..57a9fdba1bc 100644 --- a/src/pip/_internal/metadata/pkg_resources.py +++ b/src/pip/_internal/metadata/pkg_resources.py @@ -8,6 +8,7 @@ from pip._vendor import pkg_resources from pip._vendor.packaging.requirements import Requirement from pip._vendor.packaging.utils import NormalizedName, canonicalize_name +from pip._vendor.packaging.version import Version from pip._vendor.packaging.version import parse as parse_version from pip._internal.exceptions import InvalidWheel, NoneMetadataError, UnsupportedWheel @@ -19,7 +20,6 @@ BaseDistribution, BaseEntryPoint, BaseEnvironment, - DistributionVersion, InfoPath, Wheel, ) @@ -168,7 +168,7 @@ def canonical_name(self) -> NormalizedName: return canonicalize_name(self._dist.project_name) @property - def version(self) -> DistributionVersion: + def version(self) -> Version: return parse_version(self._dist.version) def is_file(self, path: InfoPath) -> bool: diff --git a/src/pip/_internal/operations/check.py b/src/pip/_internal/operations/check.py index 90c6a58a55e..ee1a5bcfe06 100644 --- a/src/pip/_internal/operations/check.py +++ b/src/pip/_internal/operations/check.py @@ -5,28 +5,25 @@ from typing import Callable, Dict, List, NamedTuple, Optional, Set, Tuple from pip._vendor.packaging.requirements import Requirement -from pip._vendor.packaging.specifiers import LegacySpecifier from pip._vendor.packaging.utils import NormalizedName, canonicalize_name -from pip._vendor.packaging.version import LegacyVersion +from pip._vendor.packaging.version import Version from pip._internal.distributions import make_distribution_for_install_requirement from pip._internal.metadata import get_default_environment -from pip._internal.metadata.base import DistributionVersion from pip._internal.req.req_install import InstallRequirement -from pip._internal.utils.deprecation import deprecated logger = logging.getLogger(__name__) class PackageDetails(NamedTuple): - version: DistributionVersion + version: Version dependencies: List[Requirement] # Shorthands PackageSet = Dict[NormalizedName, PackageDetails] Missing = Tuple[NormalizedName, Requirement] -Conflicting = Tuple[NormalizedName, DistributionVersion, Requirement] +Conflicting = Tuple[NormalizedName, Version, Requirement] MissingDict = Dict[NormalizedName, List[Missing]] ConflictingDict = Dict[NormalizedName, List[Conflicting]] @@ -60,8 +57,6 @@ def check_package_set( package name and returns a boolean. """ - warn_legacy_versions_and_specifiers(package_set) - missing = {} conflicting = {} @@ -152,36 +147,3 @@ def _create_whitelist( break return packages_affected - - -def warn_legacy_versions_and_specifiers(package_set: PackageSet) -> None: - for project_name, package_details in package_set.items(): - if isinstance(package_details.version, LegacyVersion): - deprecated( - reason=( - f"{project_name} {package_details.version} " - f"has a non-standard version number." - ), - replacement=( - f"to upgrade to a newer version of {project_name} " - f"or contact the author to suggest that they " - f"release a version with a conforming version number" - ), - issue=12063, - gone_in="24.1", - ) - for dep in package_details.dependencies: - if any(isinstance(spec, LegacySpecifier) for spec in dep.specifier): - deprecated( - reason=( - f"{project_name} {package_details.version} " - f"has a non-standard dependency specifier {dep}." - ), - replacement=( - f"to upgrade to a newer version of {project_name} " - f"or contact the author to suggest that they " - f"release a version with a conforming dependency specifiers" - ), - issue=12063, - gone_in="24.1", - ) diff --git a/src/pip/_internal/req/req_set.py b/src/pip/_internal/req/req_set.py index bf36114e802..ec7a6e07a25 100644 --- a/src/pip/_internal/req/req_set.py +++ b/src/pip/_internal/req/req_set.py @@ -2,12 +2,9 @@ from collections import OrderedDict from typing import Dict, List -from pip._vendor.packaging.specifiers import LegacySpecifier from pip._vendor.packaging.utils import canonicalize_name -from pip._vendor.packaging.version import LegacyVersion from pip._internal.req.req_install import InstallRequirement -from pip._internal.utils.deprecation import deprecated logger = logging.getLogger(__name__) @@ -83,37 +80,3 @@ def requirements_to_install(self) -> List[InstallRequirement]: for install_req in self.all_requirements if not install_req.constraint and not install_req.satisfied_by ] - - def warn_legacy_versions_and_specifiers(self) -> None: - for req in self.requirements_to_install: - version = req.get_dist().version - if isinstance(version, LegacyVersion): - deprecated( - reason=( - f"pip has selected the non standard version {version} " - f"of {req}. In the future this version will be " - f"ignored as it isn't standard compliant." - ), - replacement=( - "set or update constraints to select another version " - "or contact the package author to fix the version number" - ), - issue=12063, - gone_in="24.1", - ) - for dep in req.get_dist().iter_dependencies(): - if any(isinstance(spec, LegacySpecifier) for spec in dep.specifier): - deprecated( - reason=( - f"pip has selected {req} {version} which has non " - f"standard dependency specifier {dep}. " - f"In the future this version of {req} will be " - f"ignored as it isn't standard compliant." - ), - replacement=( - "set or update constraints to select another version " - "or contact the package author to fix the version number" - ), - issue=12063, - gone_in="24.1", - ) diff --git a/src/pip/_internal/resolution/resolvelib/base.py b/src/pip/_internal/resolution/resolvelib/base.py index 4269c7fbecc..0f31dc9b307 100644 --- a/src/pip/_internal/resolution/resolvelib/base.py +++ b/src/pip/_internal/resolution/resolvelib/base.py @@ -1,16 +1,15 @@ from dataclasses import dataclass -from typing import FrozenSet, Iterable, Optional, Tuple, Union +from typing import FrozenSet, Iterable, Optional, Tuple from pip._vendor.packaging.specifiers import SpecifierSet from pip._vendor.packaging.utils import NormalizedName -from pip._vendor.packaging.version import LegacyVersion, Version +from pip._vendor.packaging.version import Version from pip._internal.models.link import Link, links_equivalent from pip._internal.req.req_install import InstallRequirement from pip._internal.utils.hashes import Hashes CandidateLookup = Tuple[Optional["Candidate"], Optional[InstallRequirement]] -CandidateVersion = Union[LegacyVersion, Version] def format_name(project: NormalizedName, extras: FrozenSet[NormalizedName]) -> str: @@ -115,7 +114,7 @@ def name(self) -> str: raise NotImplementedError("Override in subclass") @property - def version(self) -> CandidateVersion: + def version(self) -> Version: raise NotImplementedError("Override in subclass") @property diff --git a/src/pip/_internal/resolution/resolvelib/candidates.py b/src/pip/_internal/resolution/resolvelib/candidates.py index 019ff60a0a5..029b26a3368 100644 --- a/src/pip/_internal/resolution/resolvelib/candidates.py +++ b/src/pip/_internal/resolution/resolvelib/candidates.py @@ -21,7 +21,7 @@ from pip._internal.utils.direct_url_helpers import direct_url_from_link from pip._internal.utils.misc import normalize_version_info -from .base import Candidate, CandidateVersion, Requirement, format_name +from .base import Candidate, Requirement, format_name if TYPE_CHECKING: from .factory import Factory @@ -145,7 +145,7 @@ def __init__( ireq: InstallRequirement, factory: "Factory", name: Optional[NormalizedName] = None, - version: Optional[CandidateVersion] = None, + version: Optional[Version] = None, ) -> None: self._link = link self._source_link = source_link @@ -190,7 +190,7 @@ def name(self) -> str: return self.project_name @property - def version(self) -> CandidateVersion: + def version(self) -> Version: if self._version is None: self._version = self.dist.version return self._version @@ -257,7 +257,7 @@ def __init__( template: InstallRequirement, factory: "Factory", name: Optional[NormalizedName] = None, - version: Optional[CandidateVersion] = None, + version: Optional[Version] = None, ) -> None: source_link = link cache_entry = factory.get_wheel_cache_entry(source_link, name) @@ -314,7 +314,7 @@ def __init__( template: InstallRequirement, factory: "Factory", name: Optional[NormalizedName] = None, - version: Optional[CandidateVersion] = None, + version: Optional[Version] = None, ) -> None: super().__init__( link=link, @@ -374,7 +374,7 @@ def name(self) -> str: return self.project_name @property - def version(self) -> CandidateVersion: + def version(self) -> Version: if self._version is None: self._version = self.dist.version return self._version @@ -473,7 +473,7 @@ def name(self) -> str: return format_name(self.base.project_name, self.extras) @property - def version(self) -> CandidateVersion: + def version(self) -> Version: return self.base.version def format_for_error(self) -> str: @@ -588,7 +588,7 @@ def name(self) -> str: return REQUIRES_PYTHON_IDENTIFIER @property - def version(self) -> CandidateVersion: + def version(self) -> Version: return self._version def format_for_error(self) -> str: diff --git a/src/pip/_internal/resolution/resolvelib/factory.py b/src/pip/_internal/resolution/resolvelib/factory.py index e36df15d459..6de5d698e16 100644 --- a/src/pip/_internal/resolution/resolvelib/factory.py +++ b/src/pip/_internal/resolution/resolvelib/factory.py @@ -23,6 +23,7 @@ from pip._vendor.packaging.requirements import InvalidRequirement from pip._vendor.packaging.specifiers import SpecifierSet from pip._vendor.packaging.utils import NormalizedName, canonicalize_name +from pip._vendor.packaging.version import Version from pip._vendor.resolvelib import ResolutionImpossible from pip._internal.cache import CacheEntry, WheelCache @@ -52,7 +53,7 @@ from pip._internal.utils.packaging import get_requirement from pip._internal.utils.virtualenv import running_under_virtualenv -from .base import Candidate, CandidateVersion, Constraint, Requirement +from .base import Candidate, Constraint, Requirement from .candidates import ( AlreadyInstalledCandidate, BaseCandidate, @@ -178,7 +179,7 @@ def _make_candidate_from_link( extras: FrozenSet[str], template: InstallRequirement, name: Optional[NormalizedName], - version: Optional[CandidateVersion], + version: Optional[Version], ) -> Optional[Candidate]: base: Optional[BaseCandidate] = self._make_base_candidate_from_link( link, template, name, version @@ -192,7 +193,7 @@ def _make_base_candidate_from_link( link: Link, template: InstallRequirement, name: Optional[NormalizedName], - version: Optional[CandidateVersion], + version: Optional[Version], ) -> Optional[BaseCandidate]: # TODO: Check already installed candidate, and use it if the link and # editable flag match. @@ -670,8 +671,8 @@ def _report_single_requirement_conflict( cands = self._finder.find_all_candidates(req.project_name) skipped_by_requires_python = self._finder.requires_python_skipped_reasons() - versions_set: Set[CandidateVersion] = set() - yanked_versions_set: Set[CandidateVersion] = set() + versions_set: Set[Version] = set() + yanked_versions_set: Set[Version] = set() for c in cands: is_yanked = c.link.is_yanked if c.link else False if is_yanked: diff --git a/src/pip/_internal/self_outdated_check.py b/src/pip/_internal/self_outdated_check.py index 0f64ae0e614..2185f2fb108 100644 --- a/src/pip/_internal/self_outdated_check.py +++ b/src/pip/_internal/self_outdated_check.py @@ -9,6 +9,7 @@ from dataclasses import dataclass from typing import Any, Callable, Dict, Optional +from pip._vendor.packaging.version import Version from pip._vendor.packaging.version import parse as parse_version from pip._vendor.rich.console import Group from pip._vendor.rich.markup import escape @@ -17,7 +18,6 @@ from pip._internal.index.collector import LinkCollector from pip._internal.index.package_finder import PackageFinder from pip._internal.metadata import get_default_environment -from pip._internal.metadata.base import DistributionVersion from pip._internal.models.selection_prefs import SelectionPreferences from pip._internal.network.session import PipSession from pip._internal.utils.compat import WINDOWS @@ -191,7 +191,7 @@ def _self_version_check_logic( *, state: SelfCheckState, current_time: datetime.datetime, - local_version: DistributionVersion, + local_version: Version, get_remote_version: Callable[[], Optional[str]], ) -> Optional[UpgradePrompt]: remote_version_str = state.get(current_time) diff --git a/src/pip/_vendor/packaging/__about__.py b/src/pip/_vendor/packaging/__about__.py deleted file mode 100644 index 3551bc2d298..00000000000 --- a/src/pip/_vendor/packaging/__about__.py +++ /dev/null @@ -1,26 +0,0 @@ -# This file is dual licensed under the terms of the Apache License, Version -# 2.0, and the BSD License. See the LICENSE file in the root of this repository -# for complete details. - -__all__ = [ - "__title__", - "__summary__", - "__uri__", - "__version__", - "__author__", - "__email__", - "__license__", - "__copyright__", -] - -__title__ = "packaging" -__summary__ = "Core utilities for Python packages" -__uri__ = "https://github.com/pypa/packaging" - -__version__ = "21.3" - -__author__ = "Donald Stufft and individual contributors" -__email__ = "donald@stufft.io" - -__license__ = "BSD-2-Clause or Apache-2.0" -__copyright__ = "2014-2019 %s" % __author__ diff --git a/src/pip/_vendor/packaging/__init__.py b/src/pip/_vendor/packaging/__init__.py index 3c50c5dcfee..22809cfd5dc 100644 --- a/src/pip/_vendor/packaging/__init__.py +++ b/src/pip/_vendor/packaging/__init__.py @@ -2,24 +2,14 @@ # 2.0, and the BSD License. See the LICENSE file in the root of this repository # for complete details. -from .__about__ import ( - __author__, - __copyright__, - __email__, - __license__, - __summary__, - __title__, - __uri__, - __version__, -) +__title__ = "packaging" +__summary__ = "Core utilities for Python packages" +__uri__ = "https://github.com/pypa/packaging" -__all__ = [ - "__title__", - "__summary__", - "__uri__", - "__version__", - "__author__", - "__email__", - "__license__", - "__copyright__", -] +__version__ = "23.2" + +__author__ = "Donald Stufft and individual contributors" +__email__ = "donald@stufft.io" + +__license__ = "BSD-2-Clause or Apache-2.0" +__copyright__ = "2014 %s" % __author__ diff --git a/src/pip/_vendor/packaging/_elffile.py b/src/pip/_vendor/packaging/_elffile.py new file mode 100644 index 00000000000..6fb19b30bb5 --- /dev/null +++ b/src/pip/_vendor/packaging/_elffile.py @@ -0,0 +1,108 @@ +""" +ELF file parser. + +This provides a class ``ELFFile`` that parses an ELF executable in a similar +interface to ``ZipFile``. Only the read interface is implemented. + +Based on: https://gist.github.com/lyssdod/f51579ae8d93c8657a5564aefc2ffbca +ELF header: https://refspecs.linuxfoundation.org/elf/gabi4+/ch4.eheader.html +""" + +import enum +import os +import struct +from typing import IO, Optional, Tuple + + +class ELFInvalid(ValueError): + pass + + +class EIClass(enum.IntEnum): + C32 = 1 + C64 = 2 + + +class EIData(enum.IntEnum): + Lsb = 1 + Msb = 2 + + +class EMachine(enum.IntEnum): + I386 = 3 + S390 = 22 + Arm = 40 + X8664 = 62 + AArc64 = 183 + + +class ELFFile: + """ + Representation of an ELF executable. + """ + + def __init__(self, f: IO[bytes]) -> None: + self._f = f + + try: + ident = self._read("16B") + except struct.error: + raise ELFInvalid("unable to parse identification") + magic = bytes(ident[:4]) + if magic != b"\x7fELF": + raise ELFInvalid(f"invalid magic: {magic!r}") + + self.capacity = ident[4] # Format for program header (bitness). + self.encoding = ident[5] # Data structure encoding (endianness). + + try: + # e_fmt: Format for program header. + # p_fmt: Format for section header. + # p_idx: Indexes to find p_type, p_offset, and p_filesz. + e_fmt, self._p_fmt, self._p_idx = { + (1, 1): ("HHIIIIIHHH", ">IIIIIIII", (0, 1, 4)), # 32-bit MSB. + (2, 1): ("HHIQQQIHHH", ">IIQQQQQQ", (0, 2, 5)), # 64-bit MSB. + }[(self.capacity, self.encoding)] + except KeyError: + raise ELFInvalid( + f"unrecognized capacity ({self.capacity}) or " + f"encoding ({self.encoding})" + ) + + try: + ( + _, + self.machine, # Architecture type. + _, + _, + self._e_phoff, # Offset of program header. + _, + self.flags, # Processor-specific flags. + _, + self._e_phentsize, # Size of section. + self._e_phnum, # Number of sections. + ) = self._read(e_fmt) + except struct.error as e: + raise ELFInvalid("unable to parse machine and section information") from e + + def _read(self, fmt: str) -> Tuple[int, ...]: + return struct.unpack(fmt, self._f.read(struct.calcsize(fmt))) + + @property + def interpreter(self) -> Optional[str]: + """ + The path recorded in the ``PT_INTERP`` section header. + """ + for index in range(self._e_phnum): + self._f.seek(self._e_phoff + self._e_phentsize * index) + try: + data = self._read(self._p_fmt) + except struct.error: + continue + if data[self._p_idx[0]] != 3: # Not PT_INTERP. + continue + self._f.seek(data[self._p_idx[1]]) + return os.fsdecode(self._f.read(data[self._p_idx[2]])).strip("\0") + return None diff --git a/src/pip/_vendor/packaging/_manylinux.py b/src/pip/_vendor/packaging/_manylinux.py index 4c379aa6f69..3705d50db91 100644 --- a/src/pip/_vendor/packaging/_manylinux.py +++ b/src/pip/_vendor/packaging/_manylinux.py @@ -1,122 +1,62 @@ import collections +import contextlib import functools import os import re -import struct import sys import warnings -from typing import IO, Dict, Iterator, NamedTuple, Optional, Tuple - - -# Python does not provide platform information at sufficient granularity to -# identify the architecture of the running executable in some cases, so we -# determine it dynamically by reading the information from the running -# process. This only applies on Linux, which uses the ELF format. -class _ELFFileHeader: - # https://en.wikipedia.org/wiki/Executable_and_Linkable_Format#File_header - class _InvalidELFFileHeader(ValueError): - """ - An invalid ELF file header was found. - """ - - ELF_MAGIC_NUMBER = 0x7F454C46 - ELFCLASS32 = 1 - ELFCLASS64 = 2 - ELFDATA2LSB = 1 - ELFDATA2MSB = 2 - EM_386 = 3 - EM_S390 = 22 - EM_ARM = 40 - EM_X86_64 = 62 - EF_ARM_ABIMASK = 0xFF000000 - EF_ARM_ABI_VER5 = 0x05000000 - EF_ARM_ABI_FLOAT_HARD = 0x00000400 - - def __init__(self, file: IO[bytes]) -> None: - def unpack(fmt: str) -> int: - try: - data = file.read(struct.calcsize(fmt)) - result: Tuple[int, ...] = struct.unpack(fmt, data) - except struct.error: - raise _ELFFileHeader._InvalidELFFileHeader() - return result[0] - - self.e_ident_magic = unpack(">I") - if self.e_ident_magic != self.ELF_MAGIC_NUMBER: - raise _ELFFileHeader._InvalidELFFileHeader() - self.e_ident_class = unpack("B") - if self.e_ident_class not in {self.ELFCLASS32, self.ELFCLASS64}: - raise _ELFFileHeader._InvalidELFFileHeader() - self.e_ident_data = unpack("B") - if self.e_ident_data not in {self.ELFDATA2LSB, self.ELFDATA2MSB}: - raise _ELFFileHeader._InvalidELFFileHeader() - self.e_ident_version = unpack("B") - self.e_ident_osabi = unpack("B") - self.e_ident_abiversion = unpack("B") - self.e_ident_pad = file.read(7) - format_h = "H" - format_i = "I" - format_q = "Q" - format_p = format_i if self.e_ident_class == self.ELFCLASS32 else format_q - self.e_type = unpack(format_h) - self.e_machine = unpack(format_h) - self.e_version = unpack(format_i) - self.e_entry = unpack(format_p) - self.e_phoff = unpack(format_p) - self.e_shoff = unpack(format_p) - self.e_flags = unpack(format_i) - self.e_ehsize = unpack(format_h) - self.e_phentsize = unpack(format_h) - self.e_phnum = unpack(format_h) - self.e_shentsize = unpack(format_h) - self.e_shnum = unpack(format_h) - self.e_shstrndx = unpack(format_h) - - -def _get_elf_header() -> Optional[_ELFFileHeader]: +from typing import Dict, Generator, Iterator, NamedTuple, Optional, Sequence, Tuple + +from ._elffile import EIClass, EIData, ELFFile, EMachine + +EF_ARM_ABIMASK = 0xFF000000 +EF_ARM_ABI_VER5 = 0x05000000 +EF_ARM_ABI_FLOAT_HARD = 0x00000400 + + +# `os.PathLike` not a generic type until Python 3.9, so sticking with `str` +# as the type for `path` until then. +@contextlib.contextmanager +def _parse_elf(path: str) -> Generator[Optional[ELFFile], None, None]: try: - with open(sys.executable, "rb") as f: - elf_header = _ELFFileHeader(f) - except (OSError, TypeError, _ELFFileHeader._InvalidELFFileHeader): - return None - return elf_header + with open(path, "rb") as f: + yield ELFFile(f) + except (OSError, TypeError, ValueError): + yield None -def _is_linux_armhf() -> bool: +def _is_linux_armhf(executable: str) -> bool: # hard-float ABI can be detected from the ELF header of the running # process # https://static.docs.arm.com/ihi0044/g/aaelf32.pdf - elf_header = _get_elf_header() - if elf_header is None: - return False - result = elf_header.e_ident_class == elf_header.ELFCLASS32 - result &= elf_header.e_ident_data == elf_header.ELFDATA2LSB - result &= elf_header.e_machine == elf_header.EM_ARM - result &= ( - elf_header.e_flags & elf_header.EF_ARM_ABIMASK - ) == elf_header.EF_ARM_ABI_VER5 - result &= ( - elf_header.e_flags & elf_header.EF_ARM_ABI_FLOAT_HARD - ) == elf_header.EF_ARM_ABI_FLOAT_HARD - return result - - -def _is_linux_i686() -> bool: - elf_header = _get_elf_header() - if elf_header is None: - return False - result = elf_header.e_ident_class == elf_header.ELFCLASS32 - result &= elf_header.e_ident_data == elf_header.ELFDATA2LSB - result &= elf_header.e_machine == elf_header.EM_386 - return result + with _parse_elf(executable) as f: + return ( + f is not None + and f.capacity == EIClass.C32 + and f.encoding == EIData.Lsb + and f.machine == EMachine.Arm + and f.flags & EF_ARM_ABIMASK == EF_ARM_ABI_VER5 + and f.flags & EF_ARM_ABI_FLOAT_HARD == EF_ARM_ABI_FLOAT_HARD + ) + + +def _is_linux_i686(executable: str) -> bool: + with _parse_elf(executable) as f: + return ( + f is not None + and f.capacity == EIClass.C32 + and f.encoding == EIData.Lsb + and f.machine == EMachine.I386 + ) -def _have_compatible_abi(arch: str) -> bool: - if arch == "armv7l": - return _is_linux_armhf() - if arch == "i686": - return _is_linux_i686() - return arch in {"x86_64", "aarch64", "ppc64", "ppc64le", "s390x"} +def _have_compatible_abi(executable: str, archs: Sequence[str]) -> bool: + if "armv7l" in archs: + return _is_linux_armhf(executable) + if "i686" in archs: + return _is_linux_i686(executable) + allowed_archs = {"x86_64", "aarch64", "ppc64", "ppc64le", "s390x", "loongarch64"} + return any(arch in allowed_archs for arch in archs) # If glibc ever changes its major version, we need to know what the last @@ -141,10 +81,10 @@ def _glibc_version_string_confstr() -> Optional[str]: # platform module. # https://github.com/python/cpython/blob/fcf1d003bf4f0100c/Lib/platform.py#L175-L183 try: - # os.confstr("CS_GNU_LIBC_VERSION") returns a string like "glibc 2.17". - version_string = os.confstr("CS_GNU_LIBC_VERSION") + # Should be a string like "glibc 2.17". + version_string: str = getattr(os, "confstr")("CS_GNU_LIBC_VERSION") assert version_string is not None - _, version = version_string.split() + _, version = version_string.rsplit() except (AssertionError, AttributeError, OSError, ValueError): # os.confstr() or CS_GNU_LIBC_VERSION not available (or a bad value)... return None @@ -211,8 +151,8 @@ def _parse_glibc_version(version_str: str) -> Tuple[int, int]: m = re.match(r"(?P[0-9]+)\.(?P[0-9]+)", version_str) if not m: warnings.warn( - "Expected glibc version with 2 components major.minor," - " got: %s" % version_str, + f"Expected glibc version with 2 components major.minor," + f" got: {version_str}", RuntimeWarning, ) return -1, -1 @@ -228,7 +168,7 @@ def _get_glibc_version() -> Tuple[int, int]: # From PEP 513, PEP 600 -def _is_compatible(name: str, arch: str, version: _GLibCVersion) -> bool: +def _is_compatible(arch: str, version: _GLibCVersion) -> bool: sys_glibc = _get_glibc_version() if sys_glibc < version: return False @@ -264,12 +204,22 @@ def _is_compatible(name: str, arch: str, version: _GLibCVersion) -> bool: } -def platform_tags(linux: str, arch: str) -> Iterator[str]: - if not _have_compatible_abi(arch): +def platform_tags(archs: Sequence[str]) -> Iterator[str]: + """Generate manylinux tags compatible to the current platform. + + :param archs: Sequence of compatible architectures. + The first one shall be the closest to the actual architecture and be the part of + platform tag after the ``linux_`` prefix, e.g. ``x86_64``. + The ``linux_`` prefix is assumed as a prerequisite for the current platform to + be manylinux-compatible. + + :returns: An iterator of compatible manylinux tags. + """ + if not _have_compatible_abi(sys.executable, archs): return # Oldest glibc to be supported regardless of architecture is (2, 17). too_old_glibc2 = _GLibCVersion(2, 16) - if arch in {"x86_64", "i686"}: + if set(archs) & {"x86_64", "i686"}: # On x86/i686 also oldest glibc to be supported is (2, 5). too_old_glibc2 = _GLibCVersion(2, 4) current_glibc = _GLibCVersion(*_get_glibc_version()) @@ -283,19 +233,20 @@ def platform_tags(linux: str, arch: str) -> Iterator[str]: for glibc_major in range(current_glibc.major - 1, 1, -1): glibc_minor = _LAST_GLIBC_MINOR[glibc_major] glibc_max_list.append(_GLibCVersion(glibc_major, glibc_minor)) - for glibc_max in glibc_max_list: - if glibc_max.major == too_old_glibc2.major: - min_minor = too_old_glibc2.minor - else: - # For other glibc major versions oldest supported is (x, 0). - min_minor = -1 - for glibc_minor in range(glibc_max.minor, min_minor, -1): - glibc_version = _GLibCVersion(glibc_max.major, glibc_minor) - tag = "manylinux_{}_{}".format(*glibc_version) - if _is_compatible(tag, arch, glibc_version): - yield linux.replace("linux", tag) - # Handle the legacy manylinux1, manylinux2010, manylinux2014 tags. - if glibc_version in _LEGACY_MANYLINUX_MAP: - legacy_tag = _LEGACY_MANYLINUX_MAP[glibc_version] - if _is_compatible(legacy_tag, arch, glibc_version): - yield linux.replace("linux", legacy_tag) + for arch in archs: + for glibc_max in glibc_max_list: + if glibc_max.major == too_old_glibc2.major: + min_minor = too_old_glibc2.minor + else: + # For other glibc major versions oldest supported is (x, 0). + min_minor = -1 + for glibc_minor in range(glibc_max.minor, min_minor, -1): + glibc_version = _GLibCVersion(glibc_max.major, glibc_minor) + tag = "manylinux_{}_{}".format(*glibc_version) + if _is_compatible(arch, glibc_version): + yield f"{tag}_{arch}" + # Handle the legacy manylinux1, manylinux2010, manylinux2014 tags. + if glibc_version in _LEGACY_MANYLINUX_MAP: + legacy_tag = _LEGACY_MANYLINUX_MAP[glibc_version] + if _is_compatible(arch, glibc_version): + yield f"{legacy_tag}_{arch}" diff --git a/src/pip/_vendor/packaging/_musllinux.py b/src/pip/_vendor/packaging/_musllinux.py index 8ac3059ba3c..86419df9d70 100644 --- a/src/pip/_vendor/packaging/_musllinux.py +++ b/src/pip/_vendor/packaging/_musllinux.py @@ -4,68 +4,13 @@ linked against musl, and what musl version is used. """ -import contextlib import functools -import operator -import os import re -import struct import subprocess import sys -from typing import IO, Iterator, NamedTuple, Optional, Tuple +from typing import Iterator, NamedTuple, Optional, Sequence - -def _read_unpacked(f: IO[bytes], fmt: str) -> Tuple[int, ...]: - return struct.unpack(fmt, f.read(struct.calcsize(fmt))) - - -def _parse_ld_musl_from_elf(f: IO[bytes]) -> Optional[str]: - """Detect musl libc location by parsing the Python executable. - - Based on: https://gist.github.com/lyssdod/f51579ae8d93c8657a5564aefc2ffbca - ELF header: https://refspecs.linuxfoundation.org/elf/gabi4+/ch4.eheader.html - """ - f.seek(0) - try: - ident = _read_unpacked(f, "16B") - except struct.error: - return None - if ident[:4] != tuple(b"\x7fELF"): # Invalid magic, not ELF. - return None - f.seek(struct.calcsize("HHI"), 1) # Skip file type, machine, and version. - - try: - # e_fmt: Format for program header. - # p_fmt: Format for section header. - # p_idx: Indexes to find p_type, p_offset, and p_filesz. - e_fmt, p_fmt, p_idx = { - 1: ("IIIIHHH", "IIIIIIII", (0, 1, 4)), # 32-bit. - 2: ("QQQIHHH", "IIQQQQQQ", (0, 2, 5)), # 64-bit. - }[ident[4]] - except KeyError: - return None - else: - p_get = operator.itemgetter(*p_idx) - - # Find the interpreter section and return its content. - try: - _, e_phoff, _, _, _, e_phentsize, e_phnum = _read_unpacked(f, e_fmt) - except struct.error: - return None - for i in range(e_phnum + 1): - f.seek(e_phoff + e_phentsize * i) - try: - p_type, p_offset, p_filesz = p_get(_read_unpacked(f, p_fmt)) - except struct.error: - return None - if p_type != 3: # Not PT_INTERP. - continue - f.seek(p_offset) - interpreter = os.fsdecode(f.read(p_filesz)).strip("\0") - if "musl" not in interpreter: - return None - return interpreter - return None +from ._elffile import ELFFile class _MuslVersion(NamedTuple): @@ -95,32 +40,34 @@ def _get_musl_version(executable: str) -> Optional[_MuslVersion]: Version 1.2.2 Dynamic Program Loader """ - with contextlib.ExitStack() as stack: - try: - f = stack.enter_context(open(executable, "rb")) - except OSError: - return None - ld = _parse_ld_musl_from_elf(f) - if not ld: + try: + with open(executable, "rb") as f: + ld = ELFFile(f).interpreter + except (OSError, TypeError, ValueError): + return None + if ld is None or "musl" not in ld: return None - proc = subprocess.run([ld], stderr=subprocess.PIPE, universal_newlines=True) + proc = subprocess.run([ld], stderr=subprocess.PIPE, text=True) return _parse_musl_version(proc.stderr) -def platform_tags(arch: str) -> Iterator[str]: +def platform_tags(archs: Sequence[str]) -> Iterator[str]: """Generate musllinux tags compatible to the current platform. - :param arch: Should be the part of platform tag after the ``linux_`` - prefix, e.g. ``x86_64``. The ``linux_`` prefix is assumed as a - prerequisite for the current platform to be musllinux-compatible. + :param archs: Sequence of compatible architectures. + The first one shall be the closest to the actual architecture and be the part of + platform tag after the ``linux_`` prefix, e.g. ``x86_64``. + The ``linux_`` prefix is assumed as a prerequisite for the current platform to + be musllinux-compatible. :returns: An iterator of compatible musllinux tags. """ sys_musl = _get_musl_version(sys.executable) if sys_musl is None: # Python not dynamically linked against musl. return - for minor in range(sys_musl.minor, -1, -1): - yield f"musllinux_{sys_musl.major}_{minor}_{arch}" + for arch in archs: + for minor in range(sys_musl.minor, -1, -1): + yield f"musllinux_{sys_musl.major}_{minor}_{arch}" if __name__ == "__main__": # pragma: no cover diff --git a/src/pip/_vendor/packaging/_parser.py b/src/pip/_vendor/packaging/_parser.py new file mode 100644 index 00000000000..4576981c2dd --- /dev/null +++ b/src/pip/_vendor/packaging/_parser.py @@ -0,0 +1,359 @@ +"""Handwritten parser of dependency specifiers. + +The docstring for each __parse_* function contains ENBF-inspired grammar representing +the implementation. +""" + +import ast +from typing import Any, List, NamedTuple, Optional, Tuple, Union + +from ._tokenizer import DEFAULT_RULES, Tokenizer + + +class Node: + def __init__(self, value: str) -> None: + self.value = value + + def __str__(self) -> str: + return self.value + + def __repr__(self) -> str: + return f"<{self.__class__.__name__}('{self}')>" + + def serialize(self) -> str: + raise NotImplementedError + + +class Variable(Node): + def serialize(self) -> str: + return str(self) + + +class Value(Node): + def serialize(self) -> str: + return f'"{self}"' + + +class Op(Node): + def serialize(self) -> str: + return str(self) + + +MarkerVar = Union[Variable, Value] +MarkerItem = Tuple[MarkerVar, Op, MarkerVar] +# MarkerAtom = Union[MarkerItem, List["MarkerAtom"]] +# MarkerList = List[Union["MarkerList", MarkerAtom, str]] +# mypy does not support recursive type definition +# https://github.com/python/mypy/issues/731 +MarkerAtom = Any +MarkerList = List[Any] + + +class ParsedRequirement(NamedTuple): + name: str + url: str + extras: List[str] + specifier: str + marker: Optional[MarkerList] + + +# -------------------------------------------------------------------------------------- +# Recursive descent parser for dependency specifier +# -------------------------------------------------------------------------------------- +def parse_requirement(source: str) -> ParsedRequirement: + return _parse_requirement(Tokenizer(source, rules=DEFAULT_RULES)) + + +def _parse_requirement(tokenizer: Tokenizer) -> ParsedRequirement: + """ + requirement = WS? IDENTIFIER WS? extras WS? requirement_details + """ + tokenizer.consume("WS") + + name_token = tokenizer.expect( + "IDENTIFIER", expected="package name at the start of dependency specifier" + ) + name = name_token.text + tokenizer.consume("WS") + + extras = _parse_extras(tokenizer) + tokenizer.consume("WS") + + url, specifier, marker = _parse_requirement_details(tokenizer) + tokenizer.expect("END", expected="end of dependency specifier") + + return ParsedRequirement(name, url, extras, specifier, marker) + + +def _parse_requirement_details( + tokenizer: Tokenizer, +) -> Tuple[str, str, Optional[MarkerList]]: + """ + requirement_details = AT URL (WS requirement_marker?)? + | specifier WS? (requirement_marker)? + """ + + specifier = "" + url = "" + marker = None + + if tokenizer.check("AT"): + tokenizer.read() + tokenizer.consume("WS") + + url_start = tokenizer.position + url = tokenizer.expect("URL", expected="URL after @").text + if tokenizer.check("END", peek=True): + return (url, specifier, marker) + + tokenizer.expect("WS", expected="whitespace after URL") + + # The input might end after whitespace. + if tokenizer.check("END", peek=True): + return (url, specifier, marker) + + marker = _parse_requirement_marker( + tokenizer, span_start=url_start, after="URL and whitespace" + ) + else: + specifier_start = tokenizer.position + specifier = _parse_specifier(tokenizer) + tokenizer.consume("WS") + + if tokenizer.check("END", peek=True): + return (url, specifier, marker) + + marker = _parse_requirement_marker( + tokenizer, + span_start=specifier_start, + after=( + "version specifier" + if specifier + else "name and no valid version specifier" + ), + ) + + return (url, specifier, marker) + + +def _parse_requirement_marker( + tokenizer: Tokenizer, *, span_start: int, after: str +) -> MarkerList: + """ + requirement_marker = SEMICOLON marker WS? + """ + + if not tokenizer.check("SEMICOLON"): + tokenizer.raise_syntax_error( + f"Expected end or semicolon (after {after})", + span_start=span_start, + ) + tokenizer.read() + + marker = _parse_marker(tokenizer) + tokenizer.consume("WS") + + return marker + + +def _parse_extras(tokenizer: Tokenizer) -> List[str]: + """ + extras = (LEFT_BRACKET wsp* extras_list? wsp* RIGHT_BRACKET)? + """ + if not tokenizer.check("LEFT_BRACKET", peek=True): + return [] + + with tokenizer.enclosing_tokens( + "LEFT_BRACKET", + "RIGHT_BRACKET", + around="extras", + ): + tokenizer.consume("WS") + extras = _parse_extras_list(tokenizer) + tokenizer.consume("WS") + + return extras + + +def _parse_extras_list(tokenizer: Tokenizer) -> List[str]: + """ + extras_list = identifier (wsp* ',' wsp* identifier)* + """ + extras: List[str] = [] + + if not tokenizer.check("IDENTIFIER"): + return extras + + extras.append(tokenizer.read().text) + + while True: + tokenizer.consume("WS") + if tokenizer.check("IDENTIFIER", peek=True): + tokenizer.raise_syntax_error("Expected comma between extra names") + elif not tokenizer.check("COMMA"): + break + + tokenizer.read() + tokenizer.consume("WS") + + extra_token = tokenizer.expect("IDENTIFIER", expected="extra name after comma") + extras.append(extra_token.text) + + return extras + + +def _parse_specifier(tokenizer: Tokenizer) -> str: + """ + specifier = LEFT_PARENTHESIS WS? version_many WS? RIGHT_PARENTHESIS + | WS? version_many WS? + """ + with tokenizer.enclosing_tokens( + "LEFT_PARENTHESIS", + "RIGHT_PARENTHESIS", + around="version specifier", + ): + tokenizer.consume("WS") + parsed_specifiers = _parse_version_many(tokenizer) + tokenizer.consume("WS") + + return parsed_specifiers + + +def _parse_version_many(tokenizer: Tokenizer) -> str: + """ + version_many = (SPECIFIER (WS? COMMA WS? SPECIFIER)*)? + """ + parsed_specifiers = "" + while tokenizer.check("SPECIFIER"): + span_start = tokenizer.position + parsed_specifiers += tokenizer.read().text + if tokenizer.check("VERSION_PREFIX_TRAIL", peek=True): + tokenizer.raise_syntax_error( + ".* suffix can only be used with `==` or `!=` operators", + span_start=span_start, + span_end=tokenizer.position + 1, + ) + if tokenizer.check("VERSION_LOCAL_LABEL_TRAIL", peek=True): + tokenizer.raise_syntax_error( + "Local version label can only be used with `==` or `!=` operators", + span_start=span_start, + span_end=tokenizer.position, + ) + tokenizer.consume("WS") + if not tokenizer.check("COMMA"): + break + parsed_specifiers += tokenizer.read().text + tokenizer.consume("WS") + + return parsed_specifiers + + +# -------------------------------------------------------------------------------------- +# Recursive descent parser for marker expression +# -------------------------------------------------------------------------------------- +def parse_marker(source: str) -> MarkerList: + return _parse_full_marker(Tokenizer(source, rules=DEFAULT_RULES)) + + +def _parse_full_marker(tokenizer: Tokenizer) -> MarkerList: + retval = _parse_marker(tokenizer) + tokenizer.expect("END", expected="end of marker expression") + return retval + + +def _parse_marker(tokenizer: Tokenizer) -> MarkerList: + """ + marker = marker_atom (BOOLOP marker_atom)+ + """ + expression = [_parse_marker_atom(tokenizer)] + while tokenizer.check("BOOLOP"): + token = tokenizer.read() + expr_right = _parse_marker_atom(tokenizer) + expression.extend((token.text, expr_right)) + return expression + + +def _parse_marker_atom(tokenizer: Tokenizer) -> MarkerAtom: + """ + marker_atom = WS? LEFT_PARENTHESIS WS? marker WS? RIGHT_PARENTHESIS WS? + | WS? marker_item WS? + """ + + tokenizer.consume("WS") + if tokenizer.check("LEFT_PARENTHESIS", peek=True): + with tokenizer.enclosing_tokens( + "LEFT_PARENTHESIS", + "RIGHT_PARENTHESIS", + around="marker expression", + ): + tokenizer.consume("WS") + marker: MarkerAtom = _parse_marker(tokenizer) + tokenizer.consume("WS") + else: + marker = _parse_marker_item(tokenizer) + tokenizer.consume("WS") + return marker + + +def _parse_marker_item(tokenizer: Tokenizer) -> MarkerItem: + """ + marker_item = WS? marker_var WS? marker_op WS? marker_var WS? + """ + tokenizer.consume("WS") + marker_var_left = _parse_marker_var(tokenizer) + tokenizer.consume("WS") + marker_op = _parse_marker_op(tokenizer) + tokenizer.consume("WS") + marker_var_right = _parse_marker_var(tokenizer) + tokenizer.consume("WS") + return (marker_var_left, marker_op, marker_var_right) + + +def _parse_marker_var(tokenizer: Tokenizer) -> MarkerVar: + """ + marker_var = VARIABLE | QUOTED_STRING + """ + if tokenizer.check("VARIABLE"): + return process_env_var(tokenizer.read().text.replace(".", "_")) + elif tokenizer.check("QUOTED_STRING"): + return process_python_str(tokenizer.read().text) + else: + tokenizer.raise_syntax_error( + message="Expected a marker variable or quoted string" + ) + + +def process_env_var(env_var: str) -> Variable: + if ( + env_var == "platform_python_implementation" + or env_var == "python_implementation" + ): + return Variable("platform_python_implementation") + else: + return Variable(env_var) + + +def process_python_str(python_str: str) -> Value: + value = ast.literal_eval(python_str) + return Value(str(value)) + + +def _parse_marker_op(tokenizer: Tokenizer) -> Op: + """ + marker_op = IN | NOT IN | OP + """ + if tokenizer.check("IN"): + tokenizer.read() + return Op("in") + elif tokenizer.check("NOT"): + tokenizer.read() + tokenizer.expect("WS", expected="whitespace after 'not'") + tokenizer.expect("IN", expected="'in' after 'not'") + return Op("not in") + elif tokenizer.check("OP"): + return Op(tokenizer.read().text) + else: + return tokenizer.raise_syntax_error( + "Expected marker operator, one of " + "<=, <, !=, ==, >=, >, ~=, ===, in, not in" + ) diff --git a/src/pip/_vendor/packaging/_tokenizer.py b/src/pip/_vendor/packaging/_tokenizer.py new file mode 100644 index 00000000000..dd0d648d49a --- /dev/null +++ b/src/pip/_vendor/packaging/_tokenizer.py @@ -0,0 +1,192 @@ +import contextlib +import re +from dataclasses import dataclass +from typing import Dict, Iterator, NoReturn, Optional, Tuple, Union + +from .specifiers import Specifier + + +@dataclass +class Token: + name: str + text: str + position: int + + +class ParserSyntaxError(Exception): + """The provided source text could not be parsed correctly.""" + + def __init__( + self, + message: str, + *, + source: str, + span: Tuple[int, int], + ) -> None: + self.span = span + self.message = message + self.source = source + + super().__init__() + + def __str__(self) -> str: + marker = " " * self.span[0] + "~" * (self.span[1] - self.span[0]) + "^" + return "\n ".join([self.message, self.source, marker]) + + +DEFAULT_RULES: "Dict[str, Union[str, re.Pattern[str]]]" = { + "LEFT_PARENTHESIS": r"\(", + "RIGHT_PARENTHESIS": r"\)", + "LEFT_BRACKET": r"\[", + "RIGHT_BRACKET": r"\]", + "SEMICOLON": r";", + "COMMA": r",", + "QUOTED_STRING": re.compile( + r""" + ( + ('[^']*') + | + ("[^"]*") + ) + """, + re.VERBOSE, + ), + "OP": r"(===|==|~=|!=|<=|>=|<|>)", + "BOOLOP": r"\b(or|and)\b", + "IN": r"\bin\b", + "NOT": r"\bnot\b", + "VARIABLE": re.compile( + r""" + \b( + python_version + |python_full_version + |os[._]name + |sys[._]platform + |platform_(release|system) + |platform[._](version|machine|python_implementation) + |python_implementation + |implementation_(name|version) + |extra + )\b + """, + re.VERBOSE, + ), + "SPECIFIER": re.compile( + Specifier._operator_regex_str + Specifier._version_regex_str, + re.VERBOSE | re.IGNORECASE, + ), + "AT": r"\@", + "URL": r"[^ \t]+", + "IDENTIFIER": r"\b[a-zA-Z0-9][a-zA-Z0-9._-]*\b", + "VERSION_PREFIX_TRAIL": r"\.\*", + "VERSION_LOCAL_LABEL_TRAIL": r"\+[a-z0-9]+(?:[-_\.][a-z0-9]+)*", + "WS": r"[ \t]+", + "END": r"$", +} + + +class Tokenizer: + """Context-sensitive token parsing. + + Provides methods to examine the input stream to check whether the next token + matches. + """ + + def __init__( + self, + source: str, + *, + rules: "Dict[str, Union[str, re.Pattern[str]]]", + ) -> None: + self.source = source + self.rules: Dict[str, re.Pattern[str]] = { + name: re.compile(pattern) for name, pattern in rules.items() + } + self.next_token: Optional[Token] = None + self.position = 0 + + def consume(self, name: str) -> None: + """Move beyond provided token name, if at current position.""" + if self.check(name): + self.read() + + def check(self, name: str, *, peek: bool = False) -> bool: + """Check whether the next token has the provided name. + + By default, if the check succeeds, the token *must* be read before + another check. If `peek` is set to `True`, the token is not loaded and + would need to be checked again. + """ + assert ( + self.next_token is None + ), f"Cannot check for {name!r}, already have {self.next_token!r}" + assert name in self.rules, f"Unknown token name: {name!r}" + + expression = self.rules[name] + + match = expression.match(self.source, self.position) + if match is None: + return False + if not peek: + self.next_token = Token(name, match[0], self.position) + return True + + def expect(self, name: str, *, expected: str) -> Token: + """Expect a certain token name next, failing with a syntax error otherwise. + + The token is *not* read. + """ + if not self.check(name): + raise self.raise_syntax_error(f"Expected {expected}") + return self.read() + + def read(self) -> Token: + """Consume the next token and return it.""" + token = self.next_token + assert token is not None + + self.position += len(token.text) + self.next_token = None + + return token + + def raise_syntax_error( + self, + message: str, + *, + span_start: Optional[int] = None, + span_end: Optional[int] = None, + ) -> NoReturn: + """Raise ParserSyntaxError at the given position.""" + span = ( + self.position if span_start is None else span_start, + self.position if span_end is None else span_end, + ) + raise ParserSyntaxError( + message, + source=self.source, + span=span, + ) + + @contextlib.contextmanager + def enclosing_tokens( + self, open_token: str, close_token: str, *, around: str + ) -> Iterator[None]: + if self.check(open_token): + open_position = self.position + self.read() + else: + open_position = None + + yield + + if open_position is None: + return + + if not self.check(close_token): + self.raise_syntax_error( + f"Expected matching {close_token} for {open_token}, after {around}", + span_start=open_position, + ) + + self.read() diff --git a/src/pip/_vendor/packaging/markers.py b/src/pip/_vendor/packaging/markers.py index 540e7a4dc79..8b98fca7233 100644 --- a/src/pip/_vendor/packaging/markers.py +++ b/src/pip/_vendor/packaging/markers.py @@ -8,19 +8,17 @@ import sys from typing import Any, Callable, Dict, List, Optional, Tuple, Union -from pip._vendor.pyparsing import ( # noqa: N817 - Forward, - Group, - Literal as L, - ParseException, - ParseResults, - QuotedString, - ZeroOrMore, - stringEnd, - stringStart, +from ._parser import ( + MarkerAtom, + MarkerList, + Op, + Value, + Variable, + parse_marker as _parse_marker, ) - +from ._tokenizer import ParserSyntaxError from .specifiers import InvalidSpecifier, Specifier +from .utils import canonicalize_name __all__ = [ "InvalidMarker", @@ -52,101 +50,24 @@ class UndefinedEnvironmentName(ValueError): """ -class Node: - def __init__(self, value: Any) -> None: - self.value = value - - def __str__(self) -> str: - return str(self.value) - - def __repr__(self) -> str: - return f"<{self.__class__.__name__}('{self}')>" - - def serialize(self) -> str: - raise NotImplementedError - - -class Variable(Node): - def serialize(self) -> str: - return str(self) - - -class Value(Node): - def serialize(self) -> str: - return f'"{self}"' - - -class Op(Node): - def serialize(self) -> str: - return str(self) - - -VARIABLE = ( - L("implementation_version") - | L("platform_python_implementation") - | L("implementation_name") - | L("python_full_version") - | L("platform_release") - | L("platform_version") - | L("platform_machine") - | L("platform_system") - | L("python_version") - | L("sys_platform") - | L("os_name") - | L("os.name") # PEP-345 - | L("sys.platform") # PEP-345 - | L("platform.version") # PEP-345 - | L("platform.machine") # PEP-345 - | L("platform.python_implementation") # PEP-345 - | L("python_implementation") # undocumented setuptools legacy - | L("extra") # PEP-508 -) -ALIASES = { - "os.name": "os_name", - "sys.platform": "sys_platform", - "platform.version": "platform_version", - "platform.machine": "platform_machine", - "platform.python_implementation": "platform_python_implementation", - "python_implementation": "platform_python_implementation", -} -VARIABLE.setParseAction(lambda s, l, t: Variable(ALIASES.get(t[0], t[0]))) - -VERSION_CMP = ( - L("===") | L("==") | L(">=") | L("<=") | L("!=") | L("~=") | L(">") | L("<") -) - -MARKER_OP = VERSION_CMP | L("not in") | L("in") -MARKER_OP.setParseAction(lambda s, l, t: Op(t[0])) - -MARKER_VALUE = QuotedString("'") | QuotedString('"') -MARKER_VALUE.setParseAction(lambda s, l, t: Value(t[0])) - -BOOLOP = L("and") | L("or") - -MARKER_VAR = VARIABLE | MARKER_VALUE - -MARKER_ITEM = Group(MARKER_VAR + MARKER_OP + MARKER_VAR) -MARKER_ITEM.setParseAction(lambda s, l, t: tuple(t[0])) - -LPAREN = L("(").suppress() -RPAREN = L(")").suppress() - -MARKER_EXPR = Forward() -MARKER_ATOM = MARKER_ITEM | Group(LPAREN + MARKER_EXPR + RPAREN) -MARKER_EXPR << MARKER_ATOM + ZeroOrMore(BOOLOP + MARKER_EXPR) - -MARKER = stringStart + MARKER_EXPR + stringEnd - - -def _coerce_parse_result(results: Union[ParseResults, List[Any]]) -> List[Any]: - if isinstance(results, ParseResults): - return [_coerce_parse_result(i) for i in results] - else: - return results +def _normalize_extra_values(results: Any) -> Any: + """ + Normalize extra values. + """ + if isinstance(results[0], tuple): + lhs, op, rhs = results[0] + if isinstance(lhs, Variable) and lhs.value == "extra": + normalized_extra = canonicalize_name(rhs.value) + rhs = Value(normalized_extra) + elif isinstance(rhs, Variable) and rhs.value == "extra": + normalized_extra = canonicalize_name(lhs.value) + lhs = Value(normalized_extra) + results[0] = lhs, op, rhs + return results def _format_marker( - marker: Union[List[str], Tuple[Node, ...], str], first: Optional[bool] = True + marker: Union[List[str], MarkerAtom, str], first: Optional[bool] = True ) -> str: assert isinstance(marker, (list, tuple, str)) @@ -192,7 +113,7 @@ def _eval_op(lhs: str, op: Op, rhs: str) -> bool: except InvalidSpecifier: pass else: - return spec.contains(lhs) + return spec.contains(lhs, prereleases=True) oper: Optional[Operator] = _operators.get(op.serialize()) if oper is None: @@ -201,25 +122,19 @@ def _eval_op(lhs: str, op: Op, rhs: str) -> bool: return oper(lhs, rhs) -class Undefined: - pass - +def _normalize(*values: str, key: str) -> Tuple[str, ...]: + # PEP 685 – Comparison of extra names for optional distribution dependencies + # https://peps.python.org/pep-0685/ + # > When comparing extra names, tools MUST normalize the names being + # > compared using the semantics outlined in PEP 503 for names + if key == "extra": + return tuple(canonicalize_name(v) for v in values) -_undefined = Undefined() + # other environment markers don't have such standards + return values -def _get_env(environment: Dict[str, str], name: str) -> str: - value: Union[str, Undefined] = environment.get(name, _undefined) - - if isinstance(value, Undefined): - raise UndefinedEnvironmentName( - f"{name!r} does not exist in evaluation environment." - ) - - return value - - -def _evaluate_markers(markers: List[Any], environment: Dict[str, str]) -> bool: +def _evaluate_markers(markers: MarkerList, environment: Dict[str, str]) -> bool: groups: List[List[bool]] = [[]] for marker in markers: @@ -231,12 +146,15 @@ def _evaluate_markers(markers: List[Any], environment: Dict[str, str]) -> bool: lhs, op, rhs = marker if isinstance(lhs, Variable): - lhs_value = _get_env(environment, lhs.value) + environment_key = lhs.value + lhs_value = environment[environment_key] rhs_value = rhs.value else: lhs_value = lhs.value - rhs_value = _get_env(environment, rhs.value) + environment_key = rhs.value + rhs_value = environment[environment_key] + lhs_value, rhs_value = _normalize(lhs_value, rhs_value, key=environment_key) groups[-1].append(_eval_op(lhs_value, op, rhs_value)) else: assert marker in ["and", "or"] @@ -274,13 +192,29 @@ def default_environment() -> Dict[str, str]: class Marker: def __init__(self, marker: str) -> None: + # Note: We create a Marker object without calling this constructor in + # packaging.requirements.Requirement. If any additional logic is + # added here, make sure to mirror/adapt Requirement. try: - self._markers = _coerce_parse_result(MARKER.parseString(marker)) - except ParseException as e: - raise InvalidMarker( - f"Invalid marker: {marker!r}, parse error at " - f"{marker[e.loc : e.loc + 8]!r}" - ) + self._markers = _normalize_extra_values(_parse_marker(marker)) + # The attribute `_markers` can be described in terms of a recursive type: + # MarkerList = List[Union[Tuple[Node, ...], str, MarkerList]] + # + # For example, the following expression: + # python_version > "3.6" or (python_version == "3.6" and os_name == "unix") + # + # is parsed into: + # [ + # (, ')>, ), + # 'and', + # [ + # (, , ), + # 'or', + # (, , ) + # ] + # ] + except ParserSyntaxError as e: + raise InvalidMarker(str(e)) from e def __str__(self) -> str: return _format_marker(self._markers) @@ -288,6 +222,15 @@ def __str__(self) -> str: def __repr__(self) -> str: return f"" + def __hash__(self) -> int: + return hash((self.__class__.__name__, str(self))) + + def __eq__(self, other: Any) -> bool: + if not isinstance(other, Marker): + return NotImplemented + + return str(self) == str(other) + def evaluate(self, environment: Optional[Dict[str, str]] = None) -> bool: """Evaluate a marker. @@ -298,7 +241,12 @@ def evaluate(self, environment: Optional[Dict[str, str]] = None) -> bool: The environment is determined from the current Python process. """ current_environment = default_environment() + current_environment["extra"] = "" if environment is not None: current_environment.update(environment) + # The API used to allow setting extra to None. We need to handle this + # case for backwards compatibility. + if current_environment["extra"] is None: + current_environment["extra"] = "" return _evaluate_markers(self._markers, current_environment) diff --git a/src/pip/_vendor/packaging/metadata.py b/src/pip/_vendor/packaging/metadata.py new file mode 100644 index 00000000000..87763455ab1 --- /dev/null +++ b/src/pip/_vendor/packaging/metadata.py @@ -0,0 +1,822 @@ +import email.feedparser +import email.header +import email.message +import email.parser +import email.policy +import sys +import typing +from typing import ( + Any, + Callable, + Dict, + Generic, + List, + Optional, + Tuple, + Type, + Union, + cast, +) + +from . import requirements, specifiers, utils, version as version_module + +T = typing.TypeVar("T") +if sys.version_info[:2] >= (3, 8): # pragma: no cover + from typing import Literal, TypedDict +else: # pragma: no cover + if typing.TYPE_CHECKING: + from pip._vendor.typing_extensions import Literal, TypedDict + else: + try: + from pip._vendor.typing_extensions import Literal, TypedDict + except ImportError: + + class Literal: + def __init_subclass__(*_args, **_kwargs): + pass + + class TypedDict: + def __init_subclass__(*_args, **_kwargs): + pass + + +try: + ExceptionGroup = __builtins__.ExceptionGroup # type: ignore[attr-defined] +except AttributeError: + + class ExceptionGroup(Exception): # type: ignore[no-redef] # noqa: N818 + """A minimal implementation of :external:exc:`ExceptionGroup` from Python 3.11. + + If :external:exc:`ExceptionGroup` is already defined by Python itself, + that version is used instead. + """ + + message: str + exceptions: List[Exception] + + def __init__(self, message: str, exceptions: List[Exception]) -> None: + self.message = message + self.exceptions = exceptions + + def __repr__(self) -> str: + return f"{self.__class__.__name__}({self.message!r}, {self.exceptions!r})" + + +class InvalidMetadata(ValueError): + """A metadata field contains invalid data.""" + + field: str + """The name of the field that contains invalid data.""" + + def __init__(self, field: str, message: str) -> None: + self.field = field + super().__init__(message) + + +# The RawMetadata class attempts to make as few assumptions about the underlying +# serialization formats as possible. The idea is that as long as a serialization +# formats offer some very basic primitives in *some* way then we can support +# serializing to and from that format. +class RawMetadata(TypedDict, total=False): + """A dictionary of raw core metadata. + + Each field in core metadata maps to a key of this dictionary (when data is + provided). The key is lower-case and underscores are used instead of dashes + compared to the equivalent core metadata field. Any core metadata field that + can be specified multiple times or can hold multiple values in a single + field have a key with a plural name. See :class:`Metadata` whose attributes + match the keys of this dictionary. + + Core metadata fields that can be specified multiple times are stored as a + list or dict depending on which is appropriate for the field. Any fields + which hold multiple values in a single field are stored as a list. + + """ + + # Metadata 1.0 - PEP 241 + metadata_version: str + name: str + version: str + platforms: List[str] + summary: str + description: str + keywords: List[str] + home_page: str + author: str + author_email: str + license: str + + # Metadata 1.1 - PEP 314 + supported_platforms: List[str] + download_url: str + classifiers: List[str] + requires: List[str] + provides: List[str] + obsoletes: List[str] + + # Metadata 1.2 - PEP 345 + maintainer: str + maintainer_email: str + requires_dist: List[str] + provides_dist: List[str] + obsoletes_dist: List[str] + requires_python: str + requires_external: List[str] + project_urls: Dict[str, str] + + # Metadata 2.0 + # PEP 426 attempted to completely revamp the metadata format + # but got stuck without ever being able to build consensus on + # it and ultimately ended up withdrawn. + # + # However, a number of tools had started emitting METADATA with + # `2.0` Metadata-Version, so for historical reasons, this version + # was skipped. + + # Metadata 2.1 - PEP 566 + description_content_type: str + provides_extra: List[str] + + # Metadata 2.2 - PEP 643 + dynamic: List[str] + + # Metadata 2.3 - PEP 685 + # No new fields were added in PEP 685, just some edge case were + # tightened up to provide better interoptability. + + +_STRING_FIELDS = { + "author", + "author_email", + "description", + "description_content_type", + "download_url", + "home_page", + "license", + "maintainer", + "maintainer_email", + "metadata_version", + "name", + "requires_python", + "summary", + "version", +} + +_LIST_FIELDS = { + "classifiers", + "dynamic", + "obsoletes", + "obsoletes_dist", + "platforms", + "provides", + "provides_dist", + "provides_extra", + "requires", + "requires_dist", + "requires_external", + "supported_platforms", +} + +_DICT_FIELDS = { + "project_urls", +} + + +def _parse_keywords(data: str) -> List[str]: + """Split a string of comma-separate keyboards into a list of keywords.""" + return [k.strip() for k in data.split(",")] + + +def _parse_project_urls(data: List[str]) -> Dict[str, str]: + """Parse a list of label/URL string pairings separated by a comma.""" + urls = {} + for pair in data: + # Our logic is slightly tricky here as we want to try and do + # *something* reasonable with malformed data. + # + # The main thing that we have to worry about, is data that does + # not have a ',' at all to split the label from the Value. There + # isn't a singular right answer here, and we will fail validation + # later on (if the caller is validating) so it doesn't *really* + # matter, but since the missing value has to be an empty str + # and our return value is dict[str, str], if we let the key + # be the missing value, then they'd have multiple '' values that + # overwrite each other in a accumulating dict. + # + # The other potentional issue is that it's possible to have the + # same label multiple times in the metadata, with no solid "right" + # answer with what to do in that case. As such, we'll do the only + # thing we can, which is treat the field as unparseable and add it + # to our list of unparsed fields. + parts = [p.strip() for p in pair.split(",", 1)] + parts.extend([""] * (max(0, 2 - len(parts)))) # Ensure 2 items + + # TODO: The spec doesn't say anything about if the keys should be + # considered case sensitive or not... logically they should + # be case-preserving and case-insensitive, but doing that + # would open up more cases where we might have duplicate + # entries. + label, url = parts + if label in urls: + # The label already exists in our set of urls, so this field + # is unparseable, and we can just add the whole thing to our + # unparseable data and stop processing it. + raise KeyError("duplicate labels in project urls") + urls[label] = url + + return urls + + +def _get_payload(msg: email.message.Message, source: Union[bytes, str]) -> str: + """Get the body of the message.""" + # If our source is a str, then our caller has managed encodings for us, + # and we don't need to deal with it. + if isinstance(source, str): + payload: str = msg.get_payload() + return payload + # If our source is a bytes, then we're managing the encoding and we need + # to deal with it. + else: + bpayload: bytes = msg.get_payload(decode=True) + try: + return bpayload.decode("utf8", "strict") + except UnicodeDecodeError: + raise ValueError("payload in an invalid encoding") + + +# The various parse_FORMAT functions here are intended to be as lenient as +# possible in their parsing, while still returning a correctly typed +# RawMetadata. +# +# To aid in this, we also generally want to do as little touching of the +# data as possible, except where there are possibly some historic holdovers +# that make valid data awkward to work with. +# +# While this is a lower level, intermediate format than our ``Metadata`` +# class, some light touch ups can make a massive difference in usability. + +# Map METADATA fields to RawMetadata. +_EMAIL_TO_RAW_MAPPING = { + "author": "author", + "author-email": "author_email", + "classifier": "classifiers", + "description": "description", + "description-content-type": "description_content_type", + "download-url": "download_url", + "dynamic": "dynamic", + "home-page": "home_page", + "keywords": "keywords", + "license": "license", + "maintainer": "maintainer", + "maintainer-email": "maintainer_email", + "metadata-version": "metadata_version", + "name": "name", + "obsoletes": "obsoletes", + "obsoletes-dist": "obsoletes_dist", + "platform": "platforms", + "project-url": "project_urls", + "provides": "provides", + "provides-dist": "provides_dist", + "provides-extra": "provides_extra", + "requires": "requires", + "requires-dist": "requires_dist", + "requires-external": "requires_external", + "requires-python": "requires_python", + "summary": "summary", + "supported-platform": "supported_platforms", + "version": "version", +} +_RAW_TO_EMAIL_MAPPING = {raw: email for email, raw in _EMAIL_TO_RAW_MAPPING.items()} + + +def parse_email(data: Union[bytes, str]) -> Tuple[RawMetadata, Dict[str, List[str]]]: + """Parse a distribution's metadata stored as email headers (e.g. from ``METADATA``). + + This function returns a two-item tuple of dicts. The first dict is of + recognized fields from the core metadata specification. Fields that can be + parsed and translated into Python's built-in types are converted + appropriately. All other fields are left as-is. Fields that are allowed to + appear multiple times are stored as lists. + + The second dict contains all other fields from the metadata. This includes + any unrecognized fields. It also includes any fields which are expected to + be parsed into a built-in type but were not formatted appropriately. Finally, + any fields that are expected to appear only once but are repeated are + included in this dict. + + """ + raw: Dict[str, Union[str, List[str], Dict[str, str]]] = {} + unparsed: Dict[str, List[str]] = {} + + if isinstance(data, str): + parsed = email.parser.Parser(policy=email.policy.compat32).parsestr(data) + else: + parsed = email.parser.BytesParser(policy=email.policy.compat32).parsebytes(data) + + # We have to wrap parsed.keys() in a set, because in the case of multiple + # values for a key (a list), the key will appear multiple times in the + # list of keys, but we're avoiding that by using get_all(). + for name in frozenset(parsed.keys()): + # Header names in RFC are case insensitive, so we'll normalize to all + # lower case to make comparisons easier. + name = name.lower() + + # We use get_all() here, even for fields that aren't multiple use, + # because otherwise someone could have e.g. two Name fields, and we + # would just silently ignore it rather than doing something about it. + headers = parsed.get_all(name) or [] + + # The way the email module works when parsing bytes is that it + # unconditionally decodes the bytes as ascii using the surrogateescape + # handler. When you pull that data back out (such as with get_all() ), + # it looks to see if the str has any surrogate escapes, and if it does + # it wraps it in a Header object instead of returning the string. + # + # As such, we'll look for those Header objects, and fix up the encoding. + value = [] + # Flag if we have run into any issues processing the headers, thus + # signalling that the data belongs in 'unparsed'. + valid_encoding = True + for h in headers: + # It's unclear if this can return more types than just a Header or + # a str, so we'll just assert here to make sure. + assert isinstance(h, (email.header.Header, str)) + + # If it's a header object, we need to do our little dance to get + # the real data out of it. In cases where there is invalid data + # we're going to end up with mojibake, but there's no obvious, good + # way around that without reimplementing parts of the Header object + # ourselves. + # + # That should be fine since, if mojibacked happens, this key is + # going into the unparsed dict anyways. + if isinstance(h, email.header.Header): + # The Header object stores it's data as chunks, and each chunk + # can be independently encoded, so we'll need to check each + # of them. + chunks: List[Tuple[bytes, Optional[str]]] = [] + for bin, encoding in email.header.decode_header(h): + try: + bin.decode("utf8", "strict") + except UnicodeDecodeError: + # Enable mojibake. + encoding = "latin1" + valid_encoding = False + else: + encoding = "utf8" + chunks.append((bin, encoding)) + + # Turn our chunks back into a Header object, then let that + # Header object do the right thing to turn them into a + # string for us. + value.append(str(email.header.make_header(chunks))) + # This is already a string, so just add it. + else: + value.append(h) + + # We've processed all of our values to get them into a list of str, + # but we may have mojibake data, in which case this is an unparsed + # field. + if not valid_encoding: + unparsed[name] = value + continue + + raw_name = _EMAIL_TO_RAW_MAPPING.get(name) + if raw_name is None: + # This is a bit of a weird situation, we've encountered a key that + # we don't know what it means, so we don't know whether it's meant + # to be a list or not. + # + # Since we can't really tell one way or another, we'll just leave it + # as a list, even though it may be a single item list, because that's + # what makes the most sense for email headers. + unparsed[name] = value + continue + + # If this is one of our string fields, then we'll check to see if our + # value is a list of a single item. If it is then we'll assume that + # it was emitted as a single string, and unwrap the str from inside + # the list. + # + # If it's any other kind of data, then we haven't the faintest clue + # what we should parse it as, and we have to just add it to our list + # of unparsed stuff. + if raw_name in _STRING_FIELDS and len(value) == 1: + raw[raw_name] = value[0] + # If this is one of our list of string fields, then we can just assign + # the value, since email *only* has strings, and our get_all() call + # above ensures that this is a list. + elif raw_name in _LIST_FIELDS: + raw[raw_name] = value + # Special Case: Keywords + # The keywords field is implemented in the metadata spec as a str, + # but it conceptually is a list of strings, and is serialized using + # ", ".join(keywords), so we'll do some light data massaging to turn + # this into what it logically is. + elif raw_name == "keywords" and len(value) == 1: + raw[raw_name] = _parse_keywords(value[0]) + # Special Case: Project-URL + # The project urls is implemented in the metadata spec as a list of + # specially-formatted strings that represent a key and a value, which + # is fundamentally a mapping, however the email format doesn't support + # mappings in a sane way, so it was crammed into a list of strings + # instead. + # + # We will do a little light data massaging to turn this into a map as + # it logically should be. + elif raw_name == "project_urls": + try: + raw[raw_name] = _parse_project_urls(value) + except KeyError: + unparsed[name] = value + # Nothing that we've done has managed to parse this, so it'll just + # throw it in our unparseable data and move on. + else: + unparsed[name] = value + + # We need to support getting the Description from the message payload in + # addition to getting it from the the headers. This does mean, though, there + # is the possibility of it being set both ways, in which case we put both + # in 'unparsed' since we don't know which is right. + try: + payload = _get_payload(parsed, data) + except ValueError: + unparsed.setdefault("description", []).append( + parsed.get_payload(decode=isinstance(data, bytes)) + ) + else: + if payload: + # Check to see if we've already got a description, if so then both + # it, and this body move to unparseable. + if "description" in raw: + description_header = cast(str, raw.pop("description")) + unparsed.setdefault("description", []).extend( + [description_header, payload] + ) + elif "description" in unparsed: + unparsed["description"].append(payload) + else: + raw["description"] = payload + + # We need to cast our `raw` to a metadata, because a TypedDict only support + # literal key names, but we're computing our key names on purpose, but the + # way this function is implemented, our `TypedDict` can only have valid key + # names. + return cast(RawMetadata, raw), unparsed + + +_NOT_FOUND = object() + + +# Keep the two values in sync. +_VALID_METADATA_VERSIONS = ["1.0", "1.1", "1.2", "2.1", "2.2", "2.3"] +_MetadataVersion = Literal["1.0", "1.1", "1.2", "2.1", "2.2", "2.3"] + +_REQUIRED_ATTRS = frozenset(["metadata_version", "name", "version"]) + + +class _Validator(Generic[T]): + """Validate a metadata field. + + All _process_*() methods correspond to a core metadata field. The method is + called with the field's raw value. If the raw value is valid it is returned + in its "enriched" form (e.g. ``version.Version`` for the ``Version`` field). + If the raw value is invalid, :exc:`InvalidMetadata` is raised (with a cause + as appropriate). + """ + + name: str + raw_name: str + added: _MetadataVersion + + def __init__( + self, + *, + added: _MetadataVersion = "1.0", + ) -> None: + self.added = added + + def __set_name__(self, _owner: "Metadata", name: str) -> None: + self.name = name + self.raw_name = _RAW_TO_EMAIL_MAPPING[name] + + def __get__(self, instance: "Metadata", _owner: Type["Metadata"]) -> T: + # With Python 3.8, the caching can be replaced with functools.cached_property(). + # No need to check the cache as attribute lookup will resolve into the + # instance's __dict__ before __get__ is called. + cache = instance.__dict__ + try: + value = instance._raw[self.name] # type: ignore[literal-required] + except KeyError: + if self.name in _STRING_FIELDS: + value = "" + elif self.name in _LIST_FIELDS: + value = [] + elif self.name in _DICT_FIELDS: + value = {} + else: # pragma: no cover + assert False + + try: + converter: Callable[[Any], T] = getattr(self, f"_process_{self.name}") + except AttributeError: + pass + else: + value = converter(value) + + cache[self.name] = value + try: + del instance._raw[self.name] # type: ignore[misc] + except KeyError: + pass + + return cast(T, value) + + def _invalid_metadata( + self, msg: str, cause: Optional[Exception] = None + ) -> InvalidMetadata: + exc = InvalidMetadata( + self.raw_name, msg.format_map({"field": repr(self.raw_name)}) + ) + exc.__cause__ = cause + return exc + + def _process_metadata_version(self, value: str) -> _MetadataVersion: + # Implicitly makes Metadata-Version required. + if value not in _VALID_METADATA_VERSIONS: + raise self._invalid_metadata(f"{value!r} is not a valid metadata version") + return cast(_MetadataVersion, value) + + def _process_name(self, value: str) -> str: + if not value: + raise self._invalid_metadata("{field} is a required field") + # Validate the name as a side-effect. + try: + utils.canonicalize_name(value, validate=True) + except utils.InvalidName as exc: + raise self._invalid_metadata( + f"{value!r} is invalid for {{field}}", cause=exc + ) + else: + return value + + def _process_version(self, value: str) -> version_module.Version: + if not value: + raise self._invalid_metadata("{field} is a required field") + try: + return version_module.parse(value) + except version_module.InvalidVersion as exc: + raise self._invalid_metadata( + f"{value!r} is invalid for {{field}}", cause=exc + ) + + def _process_summary(self, value: str) -> str: + """Check the field contains no newlines.""" + if "\n" in value: + raise self._invalid_metadata("{field} must be a single line") + return value + + def _process_description_content_type(self, value: str) -> str: + content_types = {"text/plain", "text/x-rst", "text/markdown"} + message = email.message.EmailMessage() + message["content-type"] = value + + content_type, parameters = ( + # Defaults to `text/plain` if parsing failed. + message.get_content_type().lower(), + message["content-type"].params, + ) + # Check if content-type is valid or defaulted to `text/plain` and thus was + # not parseable. + if content_type not in content_types or content_type not in value.lower(): + raise self._invalid_metadata( + f"{{field}} must be one of {list(content_types)}, not {value!r}" + ) + + charset = parameters.get("charset", "UTF-8") + if charset != "UTF-8": + raise self._invalid_metadata( + f"{{field}} can only specify the UTF-8 charset, not {list(charset)}" + ) + + markdown_variants = {"GFM", "CommonMark"} + variant = parameters.get("variant", "GFM") # Use an acceptable default. + if content_type == "text/markdown" and variant not in markdown_variants: + raise self._invalid_metadata( + f"valid Markdown variants for {{field}} are {list(markdown_variants)}, " + f"not {variant!r}", + ) + return value + + def _process_dynamic(self, value: List[str]) -> List[str]: + for dynamic_field in map(str.lower, value): + if dynamic_field in {"name", "version", "metadata-version"}: + raise self._invalid_metadata( + f"{value!r} is not allowed as a dynamic field" + ) + elif dynamic_field not in _EMAIL_TO_RAW_MAPPING: + raise self._invalid_metadata(f"{value!r} is not a valid dynamic field") + return list(map(str.lower, value)) + + def _process_provides_extra( + self, + value: List[str], + ) -> List[utils.NormalizedName]: + normalized_names = [] + try: + for name in value: + normalized_names.append(utils.canonicalize_name(name, validate=True)) + except utils.InvalidName as exc: + raise self._invalid_metadata( + f"{name!r} is invalid for {{field}}", cause=exc + ) + else: + return normalized_names + + def _process_requires_python(self, value: str) -> specifiers.SpecifierSet: + try: + return specifiers.SpecifierSet(value) + except specifiers.InvalidSpecifier as exc: + raise self._invalid_metadata( + f"{value!r} is invalid for {{field}}", cause=exc + ) + + def _process_requires_dist( + self, + value: List[str], + ) -> List[requirements.Requirement]: + reqs = [] + try: + for req in value: + reqs.append(requirements.Requirement(req)) + except requirements.InvalidRequirement as exc: + raise self._invalid_metadata(f"{req!r} is invalid for {{field}}", cause=exc) + else: + return reqs + + +class Metadata: + """Representation of distribution metadata. + + Compared to :class:`RawMetadata`, this class provides objects representing + metadata fields instead of only using built-in types. Any invalid metadata + will cause :exc:`InvalidMetadata` to be raised (with a + :py:attr:`~BaseException.__cause__` attribute as appropriate). + """ + + _raw: RawMetadata + + @classmethod + def from_raw(cls, data: RawMetadata, *, validate: bool = True) -> "Metadata": + """Create an instance from :class:`RawMetadata`. + + If *validate* is true, all metadata will be validated. All exceptions + related to validation will be gathered and raised as an :class:`ExceptionGroup`. + """ + ins = cls() + ins._raw = data.copy() # Mutations occur due to caching enriched values. + + if validate: + exceptions: List[InvalidMetadata] = [] + try: + metadata_version = ins.metadata_version + metadata_age = _VALID_METADATA_VERSIONS.index(metadata_version) + except InvalidMetadata as metadata_version_exc: + exceptions.append(metadata_version_exc) + metadata_version = None + + # Make sure to check for the fields that are present, the required + # fields (so their absence can be reported). + fields_to_check = frozenset(ins._raw) | _REQUIRED_ATTRS + # Remove fields that have already been checked. + fields_to_check -= {"metadata_version"} + + for key in fields_to_check: + try: + if metadata_version: + # Can't use getattr() as that triggers descriptor protocol which + # will fail due to no value for the instance argument. + try: + field_metadata_version = cls.__dict__[key].added + except KeyError: + exc = InvalidMetadata(key, f"unrecognized field: {key!r}") + exceptions.append(exc) + continue + field_age = _VALID_METADATA_VERSIONS.index( + field_metadata_version + ) + if field_age > metadata_age: + field = _RAW_TO_EMAIL_MAPPING[key] + exc = InvalidMetadata( + field, + "{field} introduced in metadata version " + "{field_metadata_version}, not {metadata_version}", + ) + exceptions.append(exc) + continue + getattr(ins, key) + except InvalidMetadata as exc: + exceptions.append(exc) + + if exceptions: + raise ExceptionGroup("invalid metadata", exceptions) + + return ins + + @classmethod + def from_email( + cls, data: Union[bytes, str], *, validate: bool = True + ) -> "Metadata": + """Parse metadata from email headers. + + If *validate* is true, the metadata will be validated. All exceptions + related to validation will be gathered and raised as an :class:`ExceptionGroup`. + """ + exceptions: list[InvalidMetadata] = [] + raw, unparsed = parse_email(data) + + if validate: + for unparsed_key in unparsed: + if unparsed_key in _EMAIL_TO_RAW_MAPPING: + message = f"{unparsed_key!r} has invalid data" + else: + message = f"unrecognized field: {unparsed_key!r}" + exceptions.append(InvalidMetadata(unparsed_key, message)) + + if exceptions: + raise ExceptionGroup("unparsed", exceptions) + + try: + return cls.from_raw(raw, validate=validate) + except ExceptionGroup as exc_group: + exceptions.extend(exc_group.exceptions) + raise ExceptionGroup("invalid or unparsed metadata", exceptions) from None + + metadata_version: _Validator[_MetadataVersion] = _Validator() + """:external:ref:`core-metadata-metadata-version` + (required; validated to be a valid metadata version)""" + name: _Validator[str] = _Validator() + """:external:ref:`core-metadata-name` + (required; validated using :func:`~packaging.utils.canonicalize_name` and its + *validate* parameter)""" + version: _Validator[version_module.Version] = _Validator() + """:external:ref:`core-metadata-version` (required)""" + dynamic: _Validator[List[str]] = _Validator( + added="2.2", + ) + """:external:ref:`core-metadata-dynamic` + (validated against core metadata field names and lowercased)""" + platforms: _Validator[List[str]] = _Validator() + """:external:ref:`core-metadata-platform`""" + supported_platforms: _Validator[List[str]] = _Validator(added="1.1") + """:external:ref:`core-metadata-supported-platform`""" + summary: _Validator[str] = _Validator() + """:external:ref:`core-metadata-summary` (validated to contain no newlines)""" + description: _Validator[str] = _Validator() # TODO 2.1: can be in body + """:external:ref:`core-metadata-description`""" + description_content_type: _Validator[str] = _Validator(added="2.1") + """:external:ref:`core-metadata-description-content-type` (validated)""" + keywords: _Validator[List[str]] = _Validator() + """:external:ref:`core-metadata-keywords`""" + home_page: _Validator[str] = _Validator() + """:external:ref:`core-metadata-home-page`""" + download_url: _Validator[str] = _Validator(added="1.1") + """:external:ref:`core-metadata-download-url`""" + author: _Validator[str] = _Validator() + """:external:ref:`core-metadata-author`""" + author_email: _Validator[str] = _Validator() + """:external:ref:`core-metadata-author-email`""" + maintainer: _Validator[str] = _Validator(added="1.2") + """:external:ref:`core-metadata-maintainer`""" + maintainer_email: _Validator[str] = _Validator(added="1.2") + """:external:ref:`core-metadata-maintainer-email`""" + license: _Validator[str] = _Validator() + """:external:ref:`core-metadata-license`""" + classifiers: _Validator[List[str]] = _Validator(added="1.1") + """:external:ref:`core-metadata-classifier`""" + requires_dist: _Validator[List[requirements.Requirement]] = _Validator(added="1.2") + """:external:ref:`core-metadata-requires-dist`""" + requires_python: _Validator[specifiers.SpecifierSet] = _Validator(added="1.2") + """:external:ref:`core-metadata-requires-python`""" + # Because `Requires-External` allows for non-PEP 440 version specifiers, we + # don't do any processing on the values. + requires_external: _Validator[List[str]] = _Validator(added="1.2") + """:external:ref:`core-metadata-requires-external`""" + project_urls: _Validator[Dict[str, str]] = _Validator(added="1.2") + """:external:ref:`core-metadata-project-url`""" + # PEP 685 lets us raise an error if an extra doesn't pass `Name` validation + # regardless of metadata version. + provides_extra: _Validator[List[utils.NormalizedName]] = _Validator( + added="2.1", + ) + """:external:ref:`core-metadata-provides-extra`""" + provides_dist: _Validator[List[str]] = _Validator(added="1.2") + """:external:ref:`core-metadata-provides-dist`""" + obsoletes_dist: _Validator[List[str]] = _Validator(added="1.2") + """:external:ref:`core-metadata-obsoletes-dist`""" + requires: _Validator[List[str]] = _Validator(added="1.1") + """``Requires`` (deprecated)""" + provides: _Validator[List[str]] = _Validator(added="1.1") + """``Provides`` (deprecated)""" + obsoletes: _Validator[List[str]] = _Validator(added="1.1") + """``Obsoletes`` (deprecated)""" diff --git a/src/pip/_vendor/packaging/requirements.py b/src/pip/_vendor/packaging/requirements.py index 1eab7dd66d9..0c00eba331b 100644 --- a/src/pip/_vendor/packaging/requirements.py +++ b/src/pip/_vendor/packaging/requirements.py @@ -2,26 +2,13 @@ # 2.0, and the BSD License. See the LICENSE file in the root of this repository # for complete details. -import re -import string -import urllib.parse -from typing import List, Optional as TOptional, Set - -from pip._vendor.pyparsing import ( # noqa - Combine, - Literal as L, - Optional, - ParseException, - Regex, - Word, - ZeroOrMore, - originalTextFor, - stringEnd, - stringStart, -) - -from .markers import MARKER_EXPR, Marker -from .specifiers import LegacySpecifier, Specifier, SpecifierSet +from typing import Any, Iterator, Optional, Set + +from ._parser import parse_requirement as _parse_requirement +from ._tokenizer import ParserSyntaxError +from .markers import Marker, _normalize_extra_values +from .specifiers import SpecifierSet +from .utils import canonicalize_name class InvalidRequirement(ValueError): @@ -30,60 +17,6 @@ class InvalidRequirement(ValueError): """ -ALPHANUM = Word(string.ascii_letters + string.digits) - -LBRACKET = L("[").suppress() -RBRACKET = L("]").suppress() -LPAREN = L("(").suppress() -RPAREN = L(")").suppress() -COMMA = L(",").suppress() -SEMICOLON = L(";").suppress() -AT = L("@").suppress() - -PUNCTUATION = Word("-_.") -IDENTIFIER_END = ALPHANUM | (ZeroOrMore(PUNCTUATION) + ALPHANUM) -IDENTIFIER = Combine(ALPHANUM + ZeroOrMore(IDENTIFIER_END)) - -NAME = IDENTIFIER("name") -EXTRA = IDENTIFIER - -URI = Regex(r"[^ ]+")("url") -URL = AT + URI - -EXTRAS_LIST = EXTRA + ZeroOrMore(COMMA + EXTRA) -EXTRAS = (LBRACKET + Optional(EXTRAS_LIST) + RBRACKET)("extras") - -VERSION_PEP440 = Regex(Specifier._regex_str, re.VERBOSE | re.IGNORECASE) -VERSION_LEGACY = Regex(LegacySpecifier._regex_str, re.VERBOSE | re.IGNORECASE) - -VERSION_ONE = VERSION_PEP440 ^ VERSION_LEGACY -VERSION_MANY = Combine( - VERSION_ONE + ZeroOrMore(COMMA + VERSION_ONE), joinString=",", adjacent=False -)("_raw_spec") -_VERSION_SPEC = Optional((LPAREN + VERSION_MANY + RPAREN) | VERSION_MANY) -_VERSION_SPEC.setParseAction(lambda s, l, t: t._raw_spec or "") - -VERSION_SPEC = originalTextFor(_VERSION_SPEC)("specifier") -VERSION_SPEC.setParseAction(lambda s, l, t: t[1]) - -MARKER_EXPR = originalTextFor(MARKER_EXPR())("marker") -MARKER_EXPR.setParseAction( - lambda s, l, t: Marker(s[t._original_start : t._original_end]) -) -MARKER_SEPARATOR = SEMICOLON -MARKER = MARKER_SEPARATOR + MARKER_EXPR - -VERSION_AND_MARKER = VERSION_SPEC + Optional(MARKER) -URL_AND_MARKER = URL + Optional(MARKER) - -NAMED_REQUIREMENT = NAME + Optional(EXTRAS) + (URL_AND_MARKER | VERSION_AND_MARKER) - -REQUIREMENT = stringStart + NAMED_REQUIREMENT + stringEnd -# pyparsing isn't thread safe during initialization, so we do it eagerly, see -# issue #104 -REQUIREMENT.parseString("x[]") - - class Requirement: """Parse a requirement. @@ -99,48 +32,59 @@ class Requirement: def __init__(self, requirement_string: str) -> None: try: - req = REQUIREMENT.parseString(requirement_string) - except ParseException as e: - raise InvalidRequirement( - f'Parse error at "{ requirement_string[e.loc : e.loc + 8]!r}": {e.msg}' - ) - - self.name: str = req.name - if req.url: - parsed_url = urllib.parse.urlparse(req.url) - if parsed_url.scheme == "file": - if urllib.parse.urlunparse(parsed_url) != req.url: - raise InvalidRequirement("Invalid URL given") - elif not (parsed_url.scheme and parsed_url.netloc) or ( - not parsed_url.scheme and not parsed_url.netloc - ): - raise InvalidRequirement(f"Invalid URL: {req.url}") - self.url: TOptional[str] = req.url - else: - self.url = None - self.extras: Set[str] = set(req.extras.asList() if req.extras else []) - self.specifier: SpecifierSet = SpecifierSet(req.specifier) - self.marker: TOptional[Marker] = req.marker if req.marker else None - - def __str__(self) -> str: - parts: List[str] = [self.name] + parsed = _parse_requirement(requirement_string) + except ParserSyntaxError as e: + raise InvalidRequirement(str(e)) from e + + self.name: str = parsed.name + self.url: Optional[str] = parsed.url or None + self.extras: Set[str] = set(parsed.extras if parsed.extras else []) + self.specifier: SpecifierSet = SpecifierSet(parsed.specifier) + self.marker: Optional[Marker] = None + if parsed.marker is not None: + self.marker = Marker.__new__(Marker) + self.marker._markers = _normalize_extra_values(parsed.marker) + + def _iter_parts(self, name: str) -> Iterator[str]: + yield name if self.extras: formatted_extras = ",".join(sorted(self.extras)) - parts.append(f"[{formatted_extras}]") + yield f"[{formatted_extras}]" if self.specifier: - parts.append(str(self.specifier)) + yield str(self.specifier) if self.url: - parts.append(f"@ {self.url}") + yield f"@ {self.url}" if self.marker: - parts.append(" ") + yield " " if self.marker: - parts.append(f"; {self.marker}") + yield f"; {self.marker}" - return "".join(parts) + def __str__(self) -> str: + return "".join(self._iter_parts(self.name)) def __repr__(self) -> str: return f"" + + def __hash__(self) -> int: + return hash( + ( + self.__class__.__name__, + *self._iter_parts(canonicalize_name(self.name)), + ) + ) + + def __eq__(self, other: Any) -> bool: + if not isinstance(other, Requirement): + return NotImplemented + + return ( + canonicalize_name(self.name) == canonicalize_name(other.name) + and self.extras == other.extras + and self.specifier == other.specifier + and self.url == other.url + and self.marker == other.marker + ) diff --git a/src/pip/_vendor/packaging/specifiers.py b/src/pip/_vendor/packaging/specifiers.py index 0e218a6f9f7..7617726c385 100644 --- a/src/pip/_vendor/packaging/specifiers.py +++ b/src/pip/_vendor/packaging/specifiers.py @@ -1,20 +1,22 @@ # This file is dual licensed under the terms of the Apache License, Version # 2.0, and the BSD License. See the LICENSE file in the root of this repository # for complete details. +""" +.. testsetup:: + + from pip._vendor.packaging.specifiers import Specifier, SpecifierSet, InvalidSpecifier + from pip._vendor.packaging.version import Version +""" import abc -import functools import itertools import re -import warnings from typing import ( Callable, - Dict, Iterable, Iterator, List, Optional, - Pattern, Set, Tuple, TypeVar, @@ -22,17 +24,28 @@ ) from .utils import canonicalize_version -from .version import LegacyVersion, Version, parse +from .version import Version + +UnparsedVersion = Union[Version, str] +UnparsedVersionVar = TypeVar("UnparsedVersionVar", bound=UnparsedVersion) +CallableOperator = Callable[[Version, str], bool] -ParsedVersion = Union[Version, LegacyVersion] -UnparsedVersion = Union[Version, LegacyVersion, str] -VersionTypeVar = TypeVar("VersionTypeVar", bound=UnparsedVersion) -CallableOperator = Callable[[ParsedVersion, str], bool] + +def _coerce_version(version: UnparsedVersion) -> Version: + if not isinstance(version, Version): + version = Version(version) + return version class InvalidSpecifier(ValueError): """ - An invalid specifier was found, users should refer to PEP 440. + Raised when attempting to create a :class:`Specifier` with a specifier + string that is invalid. + + >>> Specifier("lolwat") + Traceback (most recent call last): + ... + packaging.specifiers.InvalidSpecifier: Invalid specifier: 'lolwat' """ @@ -40,35 +53,39 @@ class BaseSpecifier(metaclass=abc.ABCMeta): @abc.abstractmethod def __str__(self) -> str: """ - Returns the str representation of this Specifier like object. This + Returns the str representation of this Specifier-like object. This should be representative of the Specifier itself. """ @abc.abstractmethod def __hash__(self) -> int: """ - Returns a hash value for this Specifier like object. + Returns a hash value for this Specifier-like object. """ @abc.abstractmethod def __eq__(self, other: object) -> bool: """ - Returns a boolean representing whether or not the two Specifier like + Returns a boolean representing whether or not the two Specifier-like objects are equal. + + :param other: The other object to check against. """ - @abc.abstractproperty + @property + @abc.abstractmethod def prereleases(self) -> Optional[bool]: - """ - Returns whether or not pre-releases as a whole are allowed by this - specifier. + """Whether or not pre-releases as a whole are allowed. + + This can be set to either ``True`` or ``False`` to explicitly enable or disable + prereleases or it can be set to ``None`` (the default) to use default semantics. """ @prereleases.setter def prereleases(self, value: bool) -> None: - """ - Sets whether or not pre-releases as a whole are allowed by this - specifier. + """Setter for :attr:`prereleases`. + + :param value: The value to set. """ @abc.abstractmethod @@ -79,227 +96,28 @@ def contains(self, item: str, prereleases: Optional[bool] = None) -> bool: @abc.abstractmethod def filter( - self, iterable: Iterable[VersionTypeVar], prereleases: Optional[bool] = None - ) -> Iterable[VersionTypeVar]: + self, iterable: Iterable[UnparsedVersionVar], prereleases: Optional[bool] = None + ) -> Iterator[UnparsedVersionVar]: """ Takes an iterable of items and filters them so that only items which are contained within this specifier are allowed in it. """ -class _IndividualSpecifier(BaseSpecifier): - - _operators: Dict[str, str] = {} - _regex: Pattern[str] - - def __init__(self, spec: str = "", prereleases: Optional[bool] = None) -> None: - match = self._regex.search(spec) - if not match: - raise InvalidSpecifier(f"Invalid specifier: '{spec}'") - - self._spec: Tuple[str, str] = ( - match.group("operator").strip(), - match.group("version").strip(), - ) - - # Store whether or not this Specifier should accept prereleases - self._prereleases = prereleases - - def __repr__(self) -> str: - pre = ( - f", prereleases={self.prereleases!r}" - if self._prereleases is not None - else "" - ) - - return f"<{self.__class__.__name__}({str(self)!r}{pre})>" - - def __str__(self) -> str: - return "{}{}".format(*self._spec) - - @property - def _canonical_spec(self) -> Tuple[str, str]: - return self._spec[0], canonicalize_version(self._spec[1]) - - def __hash__(self) -> int: - return hash(self._canonical_spec) - - def __eq__(self, other: object) -> bool: - if isinstance(other, str): - try: - other = self.__class__(str(other)) - except InvalidSpecifier: - return NotImplemented - elif not isinstance(other, self.__class__): - return NotImplemented - - return self._canonical_spec == other._canonical_spec - - def _get_operator(self, op: str) -> CallableOperator: - operator_callable: CallableOperator = getattr( - self, f"_compare_{self._operators[op]}" - ) - return operator_callable - - def _coerce_version(self, version: UnparsedVersion) -> ParsedVersion: - if not isinstance(version, (LegacyVersion, Version)): - version = parse(version) - return version +class Specifier(BaseSpecifier): + """This class abstracts handling of version specifiers. - @property - def operator(self) -> str: - return self._spec[0] + .. tip:: - @property - def version(self) -> str: - return self._spec[1] - - @property - def prereleases(self) -> Optional[bool]: - return self._prereleases - - @prereleases.setter - def prereleases(self, value: bool) -> None: - self._prereleases = value - - def __contains__(self, item: str) -> bool: - return self.contains(item) - - def contains( - self, item: UnparsedVersion, prereleases: Optional[bool] = None - ) -> bool: - - # Determine if prereleases are to be allowed or not. - if prereleases is None: - prereleases = self.prereleases - - # Normalize item to a Version or LegacyVersion, this allows us to have - # a shortcut for ``"2.0" in Specifier(">=2") - normalized_item = self._coerce_version(item) - - # Determine if we should be supporting prereleases in this specifier - # or not, if we do not support prereleases than we can short circuit - # logic if this version is a prereleases. - if normalized_item.is_prerelease and not prereleases: - return False - - # Actually do the comparison to determine if this item is contained - # within this Specifier or not. - operator_callable: CallableOperator = self._get_operator(self.operator) - return operator_callable(normalized_item, self.version) - - def filter( - self, iterable: Iterable[VersionTypeVar], prereleases: Optional[bool] = None - ) -> Iterable[VersionTypeVar]: - - yielded = False - found_prereleases = [] - - kw = {"prereleases": prereleases if prereleases is not None else True} - - # Attempt to iterate over all the values in the iterable and if any of - # them match, yield them. - for version in iterable: - parsed_version = self._coerce_version(version) - - if self.contains(parsed_version, **kw): - # If our version is a prerelease, and we were not set to allow - # prereleases, then we'll store it for later in case nothing - # else matches this specifier. - if parsed_version.is_prerelease and not ( - prereleases or self.prereleases - ): - found_prereleases.append(version) - # Either this is not a prerelease, or we should have been - # accepting prereleases from the beginning. - else: - yielded = True - yield version - - # Now that we've iterated over everything, determine if we've yielded - # any values, and if we have not and we have any prereleases stored up - # then we will go ahead and yield the prereleases. - if not yielded and found_prereleases: - for version in found_prereleases: - yield version - - -class LegacySpecifier(_IndividualSpecifier): - - _regex_str = r""" - (?P(==|!=|<=|>=|<|>)) - \s* - (?P - [^,;\s)]* # Since this is a "legacy" specifier, and the version - # string can be just about anything, we match everything - # except for whitespace, a semi-colon for marker support, - # a closing paren since versions can be enclosed in - # them, and a comma since it's a version separator. - ) - """ - - _regex = re.compile(r"^\s*" + _regex_str + r"\s*$", re.VERBOSE | re.IGNORECASE) - - _operators = { - "==": "equal", - "!=": "not_equal", - "<=": "less_than_equal", - ">=": "greater_than_equal", - "<": "less_than", - ">": "greater_than", - } - - def __init__(self, spec: str = "", prereleases: Optional[bool] = None) -> None: - super().__init__(spec, prereleases) - - warnings.warn( - "Creating a LegacyVersion has been deprecated and will be " - "removed in the next major release", - DeprecationWarning, - ) - - def _coerce_version(self, version: UnparsedVersion) -> LegacyVersion: - if not isinstance(version, LegacyVersion): - version = LegacyVersion(str(version)) - return version - - def _compare_equal(self, prospective: LegacyVersion, spec: str) -> bool: - return prospective == self._coerce_version(spec) - - def _compare_not_equal(self, prospective: LegacyVersion, spec: str) -> bool: - return prospective != self._coerce_version(spec) - - def _compare_less_than_equal(self, prospective: LegacyVersion, spec: str) -> bool: - return prospective <= self._coerce_version(spec) - - def _compare_greater_than_equal( - self, prospective: LegacyVersion, spec: str - ) -> bool: - return prospective >= self._coerce_version(spec) - - def _compare_less_than(self, prospective: LegacyVersion, spec: str) -> bool: - return prospective < self._coerce_version(spec) - - def _compare_greater_than(self, prospective: LegacyVersion, spec: str) -> bool: - return prospective > self._coerce_version(spec) - - -def _require_version_compare( - fn: Callable[["Specifier", ParsedVersion, str], bool] -) -> Callable[["Specifier", ParsedVersion, str], bool]: - @functools.wraps(fn) - def wrapped(self: "Specifier", prospective: ParsedVersion, spec: str) -> bool: - if not isinstance(prospective, Version): - return False - return fn(self, prospective, spec) - - return wrapped - - -class Specifier(_IndividualSpecifier): + It is generally not required to instantiate this manually. You should instead + prefer to work with :class:`SpecifierSet` instead, which can parse + comma-separated version specifiers (which is what package metadata contains). + """ - _regex_str = r""" + _operator_regex_str = r""" (?P(~=|==|!=|<=|>=|<|>|===)) + """ + _version_regex_str = r""" (?P (?: # The identity operators allow for an escape hatch that will @@ -309,8 +127,10 @@ class Specifier(_IndividualSpecifier): # but included entirely as an escape hatch. (?<====) # Only match for the identity operator \s* - [^\s]* # We just match everything, except for whitespace - # since we are only testing for strict identity. + [^\s;)]* # The arbitrary version can be just about anything, + # we match everything except for whitespace, a + # semi-colon for marker support, and a closing paren + # since versions can be enclosed in them. ) | (?: @@ -323,23 +143,23 @@ class Specifier(_IndividualSpecifier): v? (?:[0-9]+!)? # epoch [0-9]+(?:\.[0-9]+)* # release - (?: # pre release - [-_\.]? - (a|b|c|rc|alpha|beta|pre|preview) - [-_\.]? - [0-9]* - )? - (?: # post release - (?:-[0-9]+)|(?:[-_\.]?(post|rev|r)[-_\.]?[0-9]*) - )? - # You cannot use a wild card and a dev or local version - # together so group them with a | and make them optional. + # You cannot use a wild card and a pre-release, post-release, a dev or + # local version together so group them with a | and make them optional. (?: + \.\* # Wild card syntax of .* + | + (?: # pre release + [-_\.]? + (alpha|beta|preview|pre|a|b|c|rc) + [-_\.]? + [0-9]* + )? + (?: # post release + (?:-[0-9]+)|(?:[-_\.]?(post|rev|r)[-_\.]?[0-9]*) + )? (?:[-_\.]?dev[-_\.]?[0-9]*)? # dev release (?:\+[a-z0-9]+(?:[-_\.][a-z0-9]+)*)? # local - | - \.\* # Wild card syntax of .* )? ) | @@ -354,7 +174,7 @@ class Specifier(_IndividualSpecifier): [0-9]+(?:\.[0-9]+)+ # release (We have a + instead of a *) (?: # pre release [-_\.]? - (a|b|c|rc|alpha|beta|pre|preview) + (alpha|beta|preview|pre|a|b|c|rc) [-_\.]? [0-9]* )? @@ -379,7 +199,7 @@ class Specifier(_IndividualSpecifier): [0-9]+(?:\.[0-9]+)* # release (?: # pre release [-_\.]? - (a|b|c|rc|alpha|beta|pre|preview) + (alpha|beta|preview|pre|a|b|c|rc) [-_\.]? [0-9]* )? @@ -391,7 +211,10 @@ class Specifier(_IndividualSpecifier): ) """ - _regex = re.compile(r"^\s*" + _regex_str + r"\s*$", re.VERBOSE | re.IGNORECASE) + _regex = re.compile( + r"^\s*" + _operator_regex_str + _version_regex_str + r"\s*$", + re.VERBOSE | re.IGNORECASE, + ) _operators = { "~=": "compatible", @@ -404,8 +227,153 @@ class Specifier(_IndividualSpecifier): "===": "arbitrary", } - @_require_version_compare - def _compare_compatible(self, prospective: ParsedVersion, spec: str) -> bool: + def __init__(self, spec: str = "", prereleases: Optional[bool] = None) -> None: + """Initialize a Specifier instance. + + :param spec: + The string representation of a specifier which will be parsed and + normalized before use. + :param prereleases: + This tells the specifier if it should accept prerelease versions if + applicable or not. The default of ``None`` will autodetect it from the + given specifiers. + :raises InvalidSpecifier: + If the given specifier is invalid (i.e. bad syntax). + """ + match = self._regex.search(spec) + if not match: + raise InvalidSpecifier(f"Invalid specifier: '{spec}'") + + self._spec: Tuple[str, str] = ( + match.group("operator").strip(), + match.group("version").strip(), + ) + + # Store whether or not this Specifier should accept prereleases + self._prereleases = prereleases + + # https://github.com/python/mypy/pull/13475#pullrequestreview-1079784515 + @property # type: ignore[override] + def prereleases(self) -> bool: + # If there is an explicit prereleases set for this, then we'll just + # blindly use that. + if self._prereleases is not None: + return self._prereleases + + # Look at all of our specifiers and determine if they are inclusive + # operators, and if they are if they are including an explicit + # prerelease. + operator, version = self._spec + if operator in ["==", ">=", "<=", "~=", "==="]: + # The == specifier can include a trailing .*, if it does we + # want to remove before parsing. + if operator == "==" and version.endswith(".*"): + version = version[:-2] + + # Parse the version, and if it is a pre-release than this + # specifier allows pre-releases. + if Version(version).is_prerelease: + return True + + return False + + @prereleases.setter + def prereleases(self, value: bool) -> None: + self._prereleases = value + + @property + def operator(self) -> str: + """The operator of this specifier. + + >>> Specifier("==1.2.3").operator + '==' + """ + return self._spec[0] + + @property + def version(self) -> str: + """The version of this specifier. + + >>> Specifier("==1.2.3").version + '1.2.3' + """ + return self._spec[1] + + def __repr__(self) -> str: + """A representation of the Specifier that shows all internal state. + + >>> Specifier('>=1.0.0') + =1.0.0')> + >>> Specifier('>=1.0.0', prereleases=False) + =1.0.0', prereleases=False)> + >>> Specifier('>=1.0.0', prereleases=True) + =1.0.0', prereleases=True)> + """ + pre = ( + f", prereleases={self.prereleases!r}" + if self._prereleases is not None + else "" + ) + + return f"<{self.__class__.__name__}({str(self)!r}{pre})>" + + def __str__(self) -> str: + """A string representation of the Specifier that can be round-tripped. + + >>> str(Specifier('>=1.0.0')) + '>=1.0.0' + >>> str(Specifier('>=1.0.0', prereleases=False)) + '>=1.0.0' + """ + return "{}{}".format(*self._spec) + + @property + def _canonical_spec(self) -> Tuple[str, str]: + canonical_version = canonicalize_version( + self._spec[1], + strip_trailing_zero=(self._spec[0] != "~="), + ) + return self._spec[0], canonical_version + + def __hash__(self) -> int: + return hash(self._canonical_spec) + + def __eq__(self, other: object) -> bool: + """Whether or not the two Specifier-like objects are equal. + + :param other: The other object to check against. + + The value of :attr:`prereleases` is ignored. + + >>> Specifier("==1.2.3") == Specifier("== 1.2.3.0") + True + >>> (Specifier("==1.2.3", prereleases=False) == + ... Specifier("==1.2.3", prereleases=True)) + True + >>> Specifier("==1.2.3") == "==1.2.3" + True + >>> Specifier("==1.2.3") == Specifier("==1.2.4") + False + >>> Specifier("==1.2.3") == Specifier("~=1.2.3") + False + """ + if isinstance(other, str): + try: + other = self.__class__(str(other)) + except InvalidSpecifier: + return NotImplemented + elif not isinstance(other, self.__class__): + return NotImplemented + + return self._canonical_spec == other._canonical_spec + + def _get_operator(self, op: str) -> CallableOperator: + operator_callable: CallableOperator = getattr( + self, f"_compare_{self._operators[op]}" + ) + return operator_callable + + def _compare_compatible(self, prospective: Version, spec: str) -> bool: # Compatible releases have an equivalent combination of >= and ==. That # is that ~=2.2 is equivalent to >=2.2,==2.*. This allows us to @@ -426,34 +394,35 @@ def _compare_compatible(self, prospective: ParsedVersion, spec: str) -> bool: prospective, prefix ) - @_require_version_compare - def _compare_equal(self, prospective: ParsedVersion, spec: str) -> bool: + def _compare_equal(self, prospective: Version, spec: str) -> bool: # We need special logic to handle prefix matching if spec.endswith(".*"): # In the case of prefix matching we want to ignore local segment. - prospective = Version(prospective.public) + normalized_prospective = canonicalize_version( + prospective.public, strip_trailing_zero=False + ) + # Get the normalized version string ignoring the trailing .* + normalized_spec = canonicalize_version(spec[:-2], strip_trailing_zero=False) # Split the spec out by dots, and pretend that there is an implicit # dot in between a release segment and a pre-release segment. - split_spec = _version_split(spec[:-2]) # Remove the trailing .* + split_spec = _version_split(normalized_spec) # Split the prospective version out by dots, and pretend that there # is an implicit dot in between a release segment and a pre-release # segment. - split_prospective = _version_split(str(prospective)) + split_prospective = _version_split(normalized_prospective) + + # 0-pad the prospective version before shortening it to get the correct + # shortened version. + padded_prospective, _ = _pad_version(split_prospective, split_spec) # Shorten the prospective version to be the same length as the spec # so that we can determine if the specifier is a prefix of the # prospective version or not. - shortened_prospective = split_prospective[: len(split_spec)] + shortened_prospective = padded_prospective[: len(split_spec)] - # Pad out our two sides with zeros so that they both equal the same - # length. - padded_spec, padded_prospective = _pad_version( - split_spec, shortened_prospective - ) - - return padded_prospective == padded_spec + return shortened_prospective == split_spec else: # Convert our spec string into a Version spec_version = Version(spec) @@ -466,30 +435,24 @@ def _compare_equal(self, prospective: ParsedVersion, spec: str) -> bool: return prospective == spec_version - @_require_version_compare - def _compare_not_equal(self, prospective: ParsedVersion, spec: str) -> bool: + def _compare_not_equal(self, prospective: Version, spec: str) -> bool: return not self._compare_equal(prospective, spec) - @_require_version_compare - def _compare_less_than_equal(self, prospective: ParsedVersion, spec: str) -> bool: + def _compare_less_than_equal(self, prospective: Version, spec: str) -> bool: # NB: Local version identifiers are NOT permitted in the version # specifier, so local version labels can be universally removed from # the prospective version. return Version(prospective.public) <= Version(spec) - @_require_version_compare - def _compare_greater_than_equal( - self, prospective: ParsedVersion, spec: str - ) -> bool: + def _compare_greater_than_equal(self, prospective: Version, spec: str) -> bool: # NB: Local version identifiers are NOT permitted in the version # specifier, so local version labels can be universally removed from # the prospective version. return Version(prospective.public) >= Version(spec) - @_require_version_compare - def _compare_less_than(self, prospective: ParsedVersion, spec_str: str) -> bool: + def _compare_less_than(self, prospective: Version, spec_str: str) -> bool: # Convert our spec to a Version instance, since we'll want to work with # it as a version. @@ -514,8 +477,7 @@ def _compare_less_than(self, prospective: ParsedVersion, spec_str: str) -> bool: # version in the spec. return True - @_require_version_compare - def _compare_greater_than(self, prospective: ParsedVersion, spec_str: str) -> bool: + def _compare_greater_than(self, prospective: Version, spec_str: str) -> bool: # Convert our spec to a Version instance, since we'll want to work with # it as a version. @@ -549,34 +511,133 @@ def _compare_greater_than(self, prospective: ParsedVersion, spec_str: str) -> bo def _compare_arbitrary(self, prospective: Version, spec: str) -> bool: return str(prospective).lower() == str(spec).lower() - @property - def prereleases(self) -> bool: + def __contains__(self, item: Union[str, Version]) -> bool: + """Return whether or not the item is contained in this specifier. - # If there is an explicit prereleases set for this, then we'll just - # blindly use that. - if self._prereleases is not None: - return self._prereleases + :param item: The item to check for. - # Look at all of our specifiers and determine if they are inclusive - # operators, and if they are if they are including an explicit - # prerelease. - operator, version = self._spec - if operator in ["==", ">=", "<=", "~=", "==="]: - # The == specifier can include a trailing .*, if it does we - # want to remove before parsing. - if operator == "==" and version.endswith(".*"): - version = version[:-2] + This is used for the ``in`` operator and behaves the same as + :meth:`contains` with no ``prereleases`` argument passed. - # Parse the version, and if it is a pre-release than this - # specifier allows pre-releases. - if parse(version).is_prerelease: - return True + >>> "1.2.3" in Specifier(">=1.2.3") + True + >>> Version("1.2.3") in Specifier(">=1.2.3") + True + >>> "1.0.0" in Specifier(">=1.2.3") + False + >>> "1.3.0a1" in Specifier(">=1.2.3") + False + >>> "1.3.0a1" in Specifier(">=1.2.3", prereleases=True) + True + """ + return self.contains(item) - return False + def contains( + self, item: UnparsedVersion, prereleases: Optional[bool] = None + ) -> bool: + """Return whether or not the item is contained in this specifier. + + :param item: + The item to check for, which can be a version string or a + :class:`Version` instance. + :param prereleases: + Whether or not to match prereleases with this Specifier. If set to + ``None`` (the default), it uses :attr:`prereleases` to determine + whether or not prereleases are allowed. + + >>> Specifier(">=1.2.3").contains("1.2.3") + True + >>> Specifier(">=1.2.3").contains(Version("1.2.3")) + True + >>> Specifier(">=1.2.3").contains("1.0.0") + False + >>> Specifier(">=1.2.3").contains("1.3.0a1") + False + >>> Specifier(">=1.2.3", prereleases=True).contains("1.3.0a1") + True + >>> Specifier(">=1.2.3").contains("1.3.0a1", prereleases=True) + True + """ - @prereleases.setter - def prereleases(self, value: bool) -> None: - self._prereleases = value + # Determine if prereleases are to be allowed or not. + if prereleases is None: + prereleases = self.prereleases + + # Normalize item to a Version, this allows us to have a shortcut for + # "2.0" in Specifier(">=2") + normalized_item = _coerce_version(item) + + # Determine if we should be supporting prereleases in this specifier + # or not, if we do not support prereleases than we can short circuit + # logic if this version is a prereleases. + if normalized_item.is_prerelease and not prereleases: + return False + + # Actually do the comparison to determine if this item is contained + # within this Specifier or not. + operator_callable: CallableOperator = self._get_operator(self.operator) + return operator_callable(normalized_item, self.version) + + def filter( + self, iterable: Iterable[UnparsedVersionVar], prereleases: Optional[bool] = None + ) -> Iterator[UnparsedVersionVar]: + """Filter items in the given iterable, that match the specifier. + + :param iterable: + An iterable that can contain version strings and :class:`Version` instances. + The items in the iterable will be filtered according to the specifier. + :param prereleases: + Whether or not to allow prereleases in the returned iterator. If set to + ``None`` (the default), it will be intelligently decide whether to allow + prereleases or not (based on the :attr:`prereleases` attribute, and + whether the only versions matching are prereleases). + + This method is smarter than just ``filter(Specifier().contains, [...])`` + because it implements the rule from :pep:`440` that a prerelease item + SHOULD be accepted if no other versions match the given specifier. + + >>> list(Specifier(">=1.2.3").filter(["1.2", "1.3", "1.5a1"])) + ['1.3'] + >>> list(Specifier(">=1.2.3").filter(["1.2", "1.2.3", "1.3", Version("1.4")])) + ['1.2.3', '1.3', ] + >>> list(Specifier(">=1.2.3").filter(["1.2", "1.5a1"])) + ['1.5a1'] + >>> list(Specifier(">=1.2.3").filter(["1.3", "1.5a1"], prereleases=True)) + ['1.3', '1.5a1'] + >>> list(Specifier(">=1.2.3", prereleases=True).filter(["1.3", "1.5a1"])) + ['1.3', '1.5a1'] + """ + + yielded = False + found_prereleases = [] + + kw = {"prereleases": prereleases if prereleases is not None else True} + + # Attempt to iterate over all the values in the iterable and if any of + # them match, yield them. + for version in iterable: + parsed_version = _coerce_version(version) + + if self.contains(parsed_version, **kw): + # If our version is a prerelease, and we were not set to allow + # prereleases, then we'll store it for later in case nothing + # else matches this specifier. + if parsed_version.is_prerelease and not ( + prereleases or self.prereleases + ): + found_prereleases.append(version) + # Either this is not a prerelease, or we should have been + # accepting prereleases from the beginning. + else: + yielded = True + yield version + + # Now that we've iterated over everything, determine if we've yielded + # any values, and if we have not and we have any prereleases stored up + # then we will go ahead and yield the prereleases. + if not yielded and found_prereleases: + for version in found_prereleases: + yield version _prefix_regex = re.compile(r"^([0-9]+)((?:a|b|c|rc)[0-9]+)$") @@ -618,22 +679,39 @@ def _pad_version(left: List[str], right: List[str]) -> Tuple[List[str], List[str class SpecifierSet(BaseSpecifier): + """This class abstracts handling of a set of version specifiers. + + It can be passed a single specifier (``>=3.0``), a comma-separated list of + specifiers (``>=3.0,!=3.1``), or no specifier at all. + """ + def __init__( self, specifiers: str = "", prereleases: Optional[bool] = None ) -> None: + """Initialize a SpecifierSet instance. + + :param specifiers: + The string representation of a specifier or a comma-separated list of + specifiers which will be parsed and normalized before use. + :param prereleases: + This tells the SpecifierSet if it should accept prerelease versions if + applicable or not. The default of ``None`` will autodetect it from the + given specifiers. + + :raises InvalidSpecifier: + If the given ``specifiers`` are not parseable than this exception will be + raised. + """ - # Split on , to break each individual specifier into it's own item, and + # Split on `,` to break each individual specifier into it's own item, and # strip each item to remove leading/trailing whitespace. split_specifiers = [s.strip() for s in specifiers.split(",") if s.strip()] # Parsed each individual specifier, attempting first to make it a - # Specifier and falling back to a LegacySpecifier. - parsed: Set[_IndividualSpecifier] = set() + # Specifier. + parsed: Set[Specifier] = set() for specifier in split_specifiers: - try: - parsed.add(Specifier(specifier)) - except InvalidSpecifier: - parsed.add(LegacySpecifier(specifier)) + parsed.add(Specifier(specifier)) # Turn our parsed specifiers into a frozen set and save them for later. self._specs = frozenset(parsed) @@ -642,7 +720,40 @@ def __init__( # we accept prereleases or not. self._prereleases = prereleases + @property + def prereleases(self) -> Optional[bool]: + # If we have been given an explicit prerelease modifier, then we'll + # pass that through here. + if self._prereleases is not None: + return self._prereleases + + # If we don't have any specifiers, and we don't have a forced value, + # then we'll just return None since we don't know if this should have + # pre-releases or not. + if not self._specs: + return None + + # Otherwise we'll see if any of the given specifiers accept + # prereleases, if any of them do we'll return True, otherwise False. + return any(s.prereleases for s in self._specs) + + @prereleases.setter + def prereleases(self, value: bool) -> None: + self._prereleases = value + def __repr__(self) -> str: + """A representation of the specifier set that shows all internal state. + + Note that the ordering of the individual specifiers within the set may not + match the input string. + + >>> SpecifierSet('>=1.0.0,!=2.0.0') + =1.0.0')> + >>> SpecifierSet('>=1.0.0,!=2.0.0', prereleases=False) + =1.0.0', prereleases=False)> + >>> SpecifierSet('>=1.0.0,!=2.0.0', prereleases=True) + =1.0.0', prereleases=True)> + """ pre = ( f", prereleases={self.prereleases!r}" if self._prereleases is not None @@ -652,12 +763,31 @@ def __repr__(self) -> str: return f"" def __str__(self) -> str: + """A string representation of the specifier set that can be round-tripped. + + Note that the ordering of the individual specifiers within the set may not + match the input string. + + >>> str(SpecifierSet(">=1.0.0,!=1.0.1")) + '!=1.0.1,>=1.0.0' + >>> str(SpecifierSet(">=1.0.0,!=1.0.1", prereleases=False)) + '!=1.0.1,>=1.0.0' + """ return ",".join(sorted(str(s) for s in self._specs)) def __hash__(self) -> int: return hash(self._specs) def __and__(self, other: Union["SpecifierSet", str]) -> "SpecifierSet": + """Return a SpecifierSet which is a combination of the two sets. + + :param other: The other object to combine with. + + >>> SpecifierSet(">=1.0.0,!=1.0.1") & '<=2.0.0,!=2.0.1' + =1.0.0')> + >>> SpecifierSet(">=1.0.0,!=1.0.1") & SpecifierSet('<=2.0.0,!=2.0.1') + =1.0.0')> + """ if isinstance(other, str): other = SpecifierSet(other) elif not isinstance(other, SpecifierSet): @@ -681,7 +811,25 @@ def __and__(self, other: Union["SpecifierSet", str]) -> "SpecifierSet": return specifier def __eq__(self, other: object) -> bool: - if isinstance(other, (str, _IndividualSpecifier)): + """Whether or not the two SpecifierSet-like objects are equal. + + :param other: The other object to check against. + + The value of :attr:`prereleases` is ignored. + + >>> SpecifierSet(">=1.0.0,!=1.0.1") == SpecifierSet(">=1.0.0,!=1.0.1") + True + >>> (SpecifierSet(">=1.0.0,!=1.0.1", prereleases=False) == + ... SpecifierSet(">=1.0.0,!=1.0.1", prereleases=True)) + True + >>> SpecifierSet(">=1.0.0,!=1.0.1") == ">=1.0.0,!=1.0.1" + True + >>> SpecifierSet(">=1.0.0,!=1.0.1") == SpecifierSet(">=1.0.0") + False + >>> SpecifierSet(">=1.0.0,!=1.0.1") == SpecifierSet(">=1.0.0,!=1.0.2") + False + """ + if isinstance(other, (str, Specifier)): other = SpecifierSet(str(other)) elif not isinstance(other, SpecifierSet): return NotImplemented @@ -689,43 +837,72 @@ def __eq__(self, other: object) -> bool: return self._specs == other._specs def __len__(self) -> int: + """Returns the number of specifiers in this specifier set.""" return len(self._specs) - def __iter__(self) -> Iterator[_IndividualSpecifier]: - return iter(self._specs) - - @property - def prereleases(self) -> Optional[bool]: - - # If we have been given an explicit prerelease modifier, then we'll - # pass that through here. - if self._prereleases is not None: - return self._prereleases - - # If we don't have any specifiers, and we don't have a forced value, - # then we'll just return None since we don't know if this should have - # pre-releases or not. - if not self._specs: - return None - - # Otherwise we'll see if any of the given specifiers accept - # prereleases, if any of them do we'll return True, otherwise False. - return any(s.prereleases for s in self._specs) + def __iter__(self) -> Iterator[Specifier]: + """ + Returns an iterator over all the underlying :class:`Specifier` instances + in this specifier set. - @prereleases.setter - def prereleases(self, value: bool) -> None: - self._prereleases = value + >>> sorted(SpecifierSet(">=1.0.0,!=1.0.1"), key=str) + [, =1.0.0')>] + """ + return iter(self._specs) def __contains__(self, item: UnparsedVersion) -> bool: + """Return whether or not the item is contained in this specifier. + + :param item: The item to check for. + + This is used for the ``in`` operator and behaves the same as + :meth:`contains` with no ``prereleases`` argument passed. + + >>> "1.2.3" in SpecifierSet(">=1.0.0,!=1.0.1") + True + >>> Version("1.2.3") in SpecifierSet(">=1.0.0,!=1.0.1") + True + >>> "1.0.1" in SpecifierSet(">=1.0.0,!=1.0.1") + False + >>> "1.3.0a1" in SpecifierSet(">=1.0.0,!=1.0.1") + False + >>> "1.3.0a1" in SpecifierSet(">=1.0.0,!=1.0.1", prereleases=True) + True + """ return self.contains(item) def contains( - self, item: UnparsedVersion, prereleases: Optional[bool] = None + self, + item: UnparsedVersion, + prereleases: Optional[bool] = None, + installed: Optional[bool] = None, ) -> bool: - - # Ensure that our item is a Version or LegacyVersion instance. - if not isinstance(item, (LegacyVersion, Version)): - item = parse(item) + """Return whether or not the item is contained in this SpecifierSet. + + :param item: + The item to check for, which can be a version string or a + :class:`Version` instance. + :param prereleases: + Whether or not to match prereleases with this SpecifierSet. If set to + ``None`` (the default), it uses :attr:`prereleases` to determine + whether or not prereleases are allowed. + + >>> SpecifierSet(">=1.0.0,!=1.0.1").contains("1.2.3") + True + >>> SpecifierSet(">=1.0.0,!=1.0.1").contains(Version("1.2.3")) + True + >>> SpecifierSet(">=1.0.0,!=1.0.1").contains("1.0.1") + False + >>> SpecifierSet(">=1.0.0,!=1.0.1").contains("1.3.0a1") + False + >>> SpecifierSet(">=1.0.0,!=1.0.1", prereleases=True).contains("1.3.0a1") + True + >>> SpecifierSet(">=1.0.0,!=1.0.1").contains("1.3.0a1", prereleases=True) + True + """ + # Ensure that our item is a Version instance. + if not isinstance(item, Version): + item = Version(item) # Determine if we're forcing a prerelease or not, if we're not forcing # one for this particular filter call, then we'll use whatever the @@ -742,6 +919,9 @@ def contains( if not prereleases and item.is_prerelease: return False + if installed and item.is_prerelease: + item = Version(item.base_version) + # We simply dispatch to the underlying specs here to make sure that the # given version is contained within all of them. # Note: This use of all() here means that an empty set of specifiers @@ -749,9 +929,46 @@ def contains( return all(s.contains(item, prereleases=prereleases) for s in self._specs) def filter( - self, iterable: Iterable[VersionTypeVar], prereleases: Optional[bool] = None - ) -> Iterable[VersionTypeVar]: - + self, iterable: Iterable[UnparsedVersionVar], prereleases: Optional[bool] = None + ) -> Iterator[UnparsedVersionVar]: + """Filter items in the given iterable, that match the specifiers in this set. + + :param iterable: + An iterable that can contain version strings and :class:`Version` instances. + The items in the iterable will be filtered according to the specifier. + :param prereleases: + Whether or not to allow prereleases in the returned iterator. If set to + ``None`` (the default), it will be intelligently decide whether to allow + prereleases or not (based on the :attr:`prereleases` attribute, and + whether the only versions matching are prereleases). + + This method is smarter than just ``filter(SpecifierSet(...).contains, [...])`` + because it implements the rule from :pep:`440` that a prerelease item + SHOULD be accepted if no other versions match the given specifier. + + >>> list(SpecifierSet(">=1.2.3").filter(["1.2", "1.3", "1.5a1"])) + ['1.3'] + >>> list(SpecifierSet(">=1.2.3").filter(["1.2", "1.3", Version("1.4")])) + ['1.3', ] + >>> list(SpecifierSet(">=1.2.3").filter(["1.2", "1.5a1"])) + [] + >>> list(SpecifierSet(">=1.2.3").filter(["1.3", "1.5a1"], prereleases=True)) + ['1.3', '1.5a1'] + >>> list(SpecifierSet(">=1.2.3", prereleases=True).filter(["1.3", "1.5a1"])) + ['1.3', '1.5a1'] + + An "empty" SpecifierSet will filter items based on the presence of prerelease + versions in the set. + + >>> list(SpecifierSet("").filter(["1.3", "1.5a1"])) + ['1.3'] + >>> list(SpecifierSet("").filter(["1.5a1"])) + ['1.5a1'] + >>> list(SpecifierSet("", prereleases=True).filter(["1.3", "1.5a1"])) + ['1.3', '1.5a1'] + >>> list(SpecifierSet("").filter(["1.3", "1.5a1"], prereleases=True)) + ['1.3', '1.5a1'] + """ # Determine if we're forcing a prerelease or not, if we're not forcing # one for this particular filter call, then we'll use whatever the # SpecifierSet thinks for whether or not we should support prereleases. @@ -764,27 +981,16 @@ def filter( if self._specs: for spec in self._specs: iterable = spec.filter(iterable, prereleases=bool(prereleases)) - return iterable + return iter(iterable) # If we do not have any specifiers, then we need to have a rough filter # which will filter out any pre-releases, unless there are no final - # releases, and which will filter out LegacyVersion in general. + # releases. else: - filtered: List[VersionTypeVar] = [] - found_prereleases: List[VersionTypeVar] = [] - - item: UnparsedVersion - parsed_version: Union[Version, LegacyVersion] + filtered: List[UnparsedVersionVar] = [] + found_prereleases: List[UnparsedVersionVar] = [] for item in iterable: - # Ensure that we some kind of Version class for this item. - if not isinstance(item, (LegacyVersion, Version)): - parsed_version = parse(item) - else: - parsed_version = item - - # Filter out any item which is parsed as a LegacyVersion - if isinstance(parsed_version, LegacyVersion): - continue + parsed_version = _coerce_version(item) # Store any item which is a pre-release for later unless we've # already found a final version or we are accepting prereleases @@ -797,6 +1003,6 @@ def filter( # If we've found no items except for pre-releases, then we'll go # ahead and use the pre-releases if not filtered and found_prereleases and prereleases is None: - return found_prereleases + return iter(found_prereleases) - return filtered + return iter(filtered) diff --git a/src/pip/_vendor/packaging/tags.py b/src/pip/_vendor/packaging/tags.py index 9a3d25a71c7..37f33b1ef84 100644 --- a/src/pip/_vendor/packaging/tags.py +++ b/src/pip/_vendor/packaging/tags.py @@ -4,6 +4,8 @@ import logging import platform +import struct +import subprocess import sys import sysconfig from importlib.machinery import EXTENSION_SUFFIXES @@ -36,7 +38,7 @@ } -_32_BIT_INTERPRETER = sys.maxsize <= 2 ** 32 +_32_BIT_INTERPRETER = struct.calcsize("P") == 4 class Tag: @@ -110,7 +112,7 @@ def parse_tag(tag: str) -> FrozenSet[Tag]: def _get_config_var(name: str, warn: bool = False) -> Union[int, str, None]: - value = sysconfig.get_config_var(name) + value: Union[int, str, None] = sysconfig.get_config_var(name) if value is None and warn: logger.debug( "Config variable '%s' is unset, Python ABI tag may be incorrect", name @@ -119,7 +121,7 @@ def _get_config_var(name: str, warn: bool = False) -> Union[int, str, None]: def _normalize_string(string: str) -> str: - return string.replace(".", "_").replace("-", "_") + return string.replace(".", "_").replace("-", "_").replace(" ", "_") def _abi3_applies(python_version: PythonVersion) -> bool: @@ -224,10 +226,45 @@ def cpython_tags( yield Tag(interpreter, "abi3", platform_) -def _generic_abi() -> Iterator[str]: - abi = sysconfig.get_config_var("SOABI") - if abi: - yield _normalize_string(abi) +def _generic_abi() -> List[str]: + """ + Return the ABI tag based on EXT_SUFFIX. + """ + # The following are examples of `EXT_SUFFIX`. + # We want to keep the parts which are related to the ABI and remove the + # parts which are related to the platform: + # - linux: '.cpython-310-x86_64-linux-gnu.so' => cp310 + # - mac: '.cpython-310-darwin.so' => cp310 + # - win: '.cp310-win_amd64.pyd' => cp310 + # - win: '.pyd' => cp37 (uses _cpython_abis()) + # - pypy: '.pypy38-pp73-x86_64-linux-gnu.so' => pypy38_pp73 + # - graalpy: '.graalpy-38-native-x86_64-darwin.dylib' + # => graalpy_38_native + + ext_suffix = _get_config_var("EXT_SUFFIX", warn=True) + if not isinstance(ext_suffix, str) or ext_suffix[0] != ".": + raise SystemError("invalid sysconfig.get_config_var('EXT_SUFFIX')") + parts = ext_suffix.split(".") + if len(parts) < 3: + # CPython3.7 and earlier uses ".pyd" on Windows. + return _cpython_abis(sys.version_info[:2]) + soabi = parts[1] + if soabi.startswith("cpython"): + # non-windows + abi = "cp" + soabi.split("-")[1] + elif soabi.startswith("cp"): + # windows + abi = soabi.split("-")[0] + elif soabi.startswith("pypy"): + abi = "-".join(soabi.split("-")[:2]) + elif soabi.startswith("graalpy"): + abi = "-".join(soabi.split("-")[:3]) + elif soabi: + # pyston, ironpython, others? + abi = soabi + else: + return [] + return [_normalize_string(abi)] def generic_tags( @@ -251,8 +288,9 @@ def generic_tags( interpreter = "".join([interp_name, interp_version]) if abis is None: abis = _generic_abi() + else: + abis = list(abis) platforms = list(platforms or platform_tags()) - abis = list(abis) if "none" not in abis: abis.append("none") for abi in abis: @@ -356,6 +394,22 @@ def mac_platforms( version_str, _, cpu_arch = platform.mac_ver() if version is None: version = cast("MacVersion", tuple(map(int, version_str.split(".")[:2]))) + if version == (10, 16): + # When built against an older macOS SDK, Python will report macOS 10.16 + # instead of the real version. + version_str = subprocess.run( + [ + sys.executable, + "-sS", + "-c", + "import platform; print(platform.mac_ver()[0])", + ], + check=True, + env={"SYSTEM_VERSION_COMPAT": "0"}, + stdout=subprocess.PIPE, + text=True, + ).stdout + version = cast("MacVersion", tuple(map(int, version_str.split(".")[:2]))) else: version = version if arch is None: @@ -416,15 +470,21 @@ def mac_platforms( def _linux_platforms(is_32bit: bool = _32_BIT_INTERPRETER) -> Iterator[str]: linux = _normalize_string(sysconfig.get_platform()) + if not linux.startswith("linux_"): + # we should never be here, just yield the sysconfig one and return + yield linux + return if is_32bit: if linux == "linux_x86_64": linux = "linux_i686" elif linux == "linux_aarch64": - linux = "linux_armv7l" + linux = "linux_armv8l" _, arch = linux.split("_", 1) - yield from _manylinux.platform_tags(linux, arch) - yield from _musllinux.platform_tags(arch) - yield linux + archs = {"armv8l": ["armv8l", "armv7l"]}.get(arch, [arch]) + yield from _manylinux.platform_tags(archs) + yield from _musllinux.platform_tags(archs) + for arch in archs: + yield f"linux_{arch}" def _generic_platforms() -> Iterator[str]: @@ -446,6 +506,9 @@ def platform_tags() -> Iterator[str]: def interpreter_name() -> str: """ Returns the name of the running interpreter. + + Some implementations have a reserved, two-letter abbreviation which will + be returned when appropriate. """ name = sys.implementation.name return INTERPRETER_SHORT_NAMES.get(name) or name @@ -482,6 +545,9 @@ def sys_tags(*, warn: bool = False) -> Iterator[Tag]: yield from generic_tags() if interp_name == "pp": - yield from compatible_tags(interpreter="pp3") + interp = "pp3" + elif interp_name == "cp": + interp = "cp" + interpreter_version(warn=warn) else: - yield from compatible_tags() + interp = None + yield from compatible_tags(interpreter=interp) diff --git a/src/pip/_vendor/packaging/utils.py b/src/pip/_vendor/packaging/utils.py index bab11b80c60..c2c2f75aa80 100644 --- a/src/pip/_vendor/packaging/utils.py +++ b/src/pip/_vendor/packaging/utils.py @@ -12,6 +12,12 @@ NormalizedName = NewType("NormalizedName", str) +class InvalidName(ValueError): + """ + An invalid distribution name; users should refer to the packaging user guide. + """ + + class InvalidWheelFilename(ValueError): """ An invalid wheel filename was found, users should refer to PEP 427. @@ -24,18 +30,31 @@ class InvalidSdistFilename(ValueError): """ +# Core metadata spec for `Name` +_validate_regex = re.compile( + r"^([A-Z0-9]|[A-Z0-9][A-Z0-9._-]*[A-Z0-9])$", re.IGNORECASE +) _canonicalize_regex = re.compile(r"[-_.]+") +_normalized_regex = re.compile(r"^([a-z0-9]|[a-z0-9]([a-z0-9-](?!--))*[a-z0-9])$") # PEP 427: The build number must start with a digit. _build_tag_regex = re.compile(r"(\d+)(.*)") -def canonicalize_name(name: str) -> NormalizedName: +def canonicalize_name(name: str, *, validate: bool = False) -> NormalizedName: + if validate and not _validate_regex.match(name): + raise InvalidName(f"name is invalid: {name!r}") # This is taken from PEP 503. value = _canonicalize_regex.sub("-", name).lower() return cast(NormalizedName, value) -def canonicalize_version(version: Union[Version, str]) -> str: +def is_normalized_name(name: str) -> bool: + return _normalized_regex.match(name) is not None + + +def canonicalize_version( + version: Union[Version, str], *, strip_trailing_zero: bool = True +) -> str: """ This is very similar to Version.__str__, but has one subtle difference with the way it handles the release segment. @@ -56,8 +75,11 @@ def canonicalize_version(version: Union[Version, str]) -> str: parts.append(f"{parsed.epoch}!") # Release segment - # NB: This strips trailing '.0's to normalize - parts.append(re.sub(r"(\.0)+$", "", ".".join(str(x) for x in parsed.release))) + release_segment = ".".join(str(x) for x in parsed.release) + if strip_trailing_zero: + # NB: This strips trailing '.0's to normalize + release_segment = re.sub(r"(\.0)+$", "", release_segment) + parts.append(release_segment) # Pre-release if parsed.pre is not None: @@ -95,11 +117,18 @@ def parse_wheel_filename( parts = filename.split("-", dashes - 2) name_part = parts[0] - # See PEP 427 for the rules on escaping the project name + # See PEP 427 for the rules on escaping the project name. if "__" in name_part or re.match(r"^[\w\d._]*$", name_part, re.UNICODE) is None: raise InvalidWheelFilename(f"Invalid project name: {filename}") name = canonicalize_name(name_part) - version = Version(parts[1]) + + try: + version = Version(parts[1]) + except InvalidVersion as e: + raise InvalidWheelFilename( + f"Invalid wheel filename (invalid version): {filename}" + ) from e + if dashes == 5: build_part = parts[2] build_match = _build_tag_regex.match(build_part) @@ -132,5 +161,12 @@ def parse_sdist_filename(filename: str) -> Tuple[NormalizedName, Version]: raise InvalidSdistFilename(f"Invalid sdist filename: {filename}") name = canonicalize_name(name_part) - version = Version(version_part) + + try: + version = Version(version_part) + except InvalidVersion as e: + raise InvalidSdistFilename( + f"Invalid sdist filename (invalid version): {filename}" + ) from e + return (name, version) diff --git a/src/pip/_vendor/packaging/version.py b/src/pip/_vendor/packaging/version.py index de9a09a4ed3..6978e2843fa 100644 --- a/src/pip/_vendor/packaging/version.py +++ b/src/pip/_vendor/packaging/version.py @@ -1,64 +1,71 @@ # This file is dual licensed under the terms of the Apache License, Version # 2.0, and the BSD License. See the LICENSE file in the root of this repository # for complete details. +""" +.. testsetup:: + + from pip._vendor.packaging.version import parse, Version +""" -import collections import itertools import re -import warnings -from typing import Callable, Iterator, List, Optional, SupportsInt, Tuple, Union +from typing import Any, Callable, NamedTuple, Optional, SupportsInt, Tuple, Union from ._structures import Infinity, InfinityType, NegativeInfinity, NegativeInfinityType -__all__ = ["parse", "Version", "LegacyVersion", "InvalidVersion", "VERSION_PATTERN"] +__all__ = ["VERSION_PATTERN", "parse", "Version", "InvalidVersion"] -InfiniteTypes = Union[InfinityType, NegativeInfinityType] -PrePostDevType = Union[InfiniteTypes, Tuple[str, int]] -SubLocalType = Union[InfiniteTypes, int, str] -LocalType = Union[ +LocalType = Tuple[Union[int, str], ...] + +CmpPrePostDevType = Union[InfinityType, NegativeInfinityType, Tuple[str, int]] +CmpLocalType = Union[ NegativeInfinityType, - Tuple[ - Union[ - SubLocalType, - Tuple[SubLocalType, str], - Tuple[NegativeInfinityType, SubLocalType], - ], - ..., - ], + Tuple[Union[Tuple[int, str], Tuple[NegativeInfinityType, Union[int, str]]], ...], ] CmpKey = Tuple[ - int, Tuple[int, ...], PrePostDevType, PrePostDevType, PrePostDevType, LocalType -] -LegacyCmpKey = Tuple[int, Tuple[str, ...]] -VersionComparisonMethod = Callable[ - [Union[CmpKey, LegacyCmpKey], Union[CmpKey, LegacyCmpKey]], bool + int, + Tuple[int, ...], + CmpPrePostDevType, + CmpPrePostDevType, + CmpPrePostDevType, + CmpLocalType, ] +VersionComparisonMethod = Callable[[CmpKey, CmpKey], bool] -_Version = collections.namedtuple( - "_Version", ["epoch", "release", "dev", "pre", "post", "local"] -) +class _Version(NamedTuple): + epoch: int + release: Tuple[int, ...] + dev: Optional[Tuple[str, int]] + pre: Optional[Tuple[str, int]] + post: Optional[Tuple[str, int]] + local: Optional[LocalType] -def parse(version: str) -> Union["LegacyVersion", "Version"]: - """ - Parse the given version string and return either a :class:`Version` object - or a :class:`LegacyVersion` object depending on if the given version is - a valid PEP 440 version or a legacy version. + +def parse(version: str) -> "Version": + """Parse the given version string. + + >>> parse('1.0.dev1') + + + :param version: The version string to parse. + :raises InvalidVersion: When the version string is not a valid version. """ - try: - return Version(version) - except InvalidVersion: - return LegacyVersion(version) + return Version(version) class InvalidVersion(ValueError): - """ - An invalid version was found, users should refer to PEP 440. + """Raised when a version string is not a valid version. + + >>> Version("invalid") + Traceback (most recent call last): + ... + packaging.version.InvalidVersion: Invalid version: 'invalid' """ class _BaseVersion: - _key: Union[CmpKey, LegacyCmpKey] + _key: Tuple[Any, ...] def __hash__(self) -> int: return hash(self._key) @@ -103,133 +110,16 @@ def __ne__(self, other: object) -> bool: return self._key != other._key -class LegacyVersion(_BaseVersion): - def __init__(self, version: str) -> None: - self._version = str(version) - self._key = _legacy_cmpkey(self._version) - - warnings.warn( - "Creating a LegacyVersion has been deprecated and will be " - "removed in the next major release", - DeprecationWarning, - ) - - def __str__(self) -> str: - return self._version - - def __repr__(self) -> str: - return f"" - - @property - def public(self) -> str: - return self._version - - @property - def base_version(self) -> str: - return self._version - - @property - def epoch(self) -> int: - return -1 - - @property - def release(self) -> None: - return None - - @property - def pre(self) -> None: - return None - - @property - def post(self) -> None: - return None - - @property - def dev(self) -> None: - return None - - @property - def local(self) -> None: - return None - - @property - def is_prerelease(self) -> bool: - return False - - @property - def is_postrelease(self) -> bool: - return False - - @property - def is_devrelease(self) -> bool: - return False - - -_legacy_version_component_re = re.compile(r"(\d+ | [a-z]+ | \.| -)", re.VERBOSE) - -_legacy_version_replacement_map = { - "pre": "c", - "preview": "c", - "-": "final-", - "rc": "c", - "dev": "@", -} - - -def _parse_version_parts(s: str) -> Iterator[str]: - for part in _legacy_version_component_re.split(s): - part = _legacy_version_replacement_map.get(part, part) - - if not part or part == ".": - continue - - if part[:1] in "0123456789": - # pad for numeric comparison - yield part.zfill(8) - else: - yield "*" + part - - # ensure that alpha/beta/candidate are before final - yield "*final" - - -def _legacy_cmpkey(version: str) -> LegacyCmpKey: - - # We hardcode an epoch of -1 here. A PEP 440 version can only have a epoch - # greater than or equal to 0. This will effectively put the LegacyVersion, - # which uses the defacto standard originally implemented by setuptools, - # as before all PEP 440 versions. - epoch = -1 - - # This scheme is taken from pkg_resources.parse_version setuptools prior to - # it's adoption of the packaging library. - parts: List[str] = [] - for part in _parse_version_parts(version.lower()): - if part.startswith("*"): - # remove "-" before a prerelease tag - if part < "*final": - while parts and parts[-1] == "*final-": - parts.pop() - - # remove trailing zeros from each series of numeric parts - while parts and parts[-1] == "00000000": - parts.pop() - - parts.append(part) - - return epoch, tuple(parts) - - # Deliberately not anchored to the start and end of the string, to make it # easier for 3rd party code to reuse -VERSION_PATTERN = r""" +_VERSION_PATTERN = r""" v? (?: (?:(?P[0-9]+)!)? # epoch (?P[0-9]+(?:\.[0-9]+)*) # release segment (?P
                                          # pre-release
             [-_\.]?
-            (?P(a|b|c|rc|alpha|beta|pre|preview))
+            (?Palpha|a|beta|b|preview|pre|c|rc)
             [-_\.]?
             (?P[0-9]+)?
         )?
@@ -253,12 +143,56 @@ def _legacy_cmpkey(version: str) -> LegacyCmpKey:
     (?:\+(?P[a-z0-9]+(?:[-_\.][a-z0-9]+)*))?       # local version
 """
 
+VERSION_PATTERN = _VERSION_PATTERN
+"""
+A string containing the regular expression used to match a valid version.
+
+The pattern is not anchored at either end, and is intended for embedding in larger
+expressions (for example, matching a version number as part of a file name). The
+regular expression should be compiled with the ``re.VERBOSE`` and ``re.IGNORECASE``
+flags set.
+
+:meta hide-value:
+"""
+
 
 class Version(_BaseVersion):
+    """This class abstracts handling of a project's versions.
+
+    A :class:`Version` instance is comparison aware and can be compared and
+    sorted using the standard Python interfaces.
+
+    >>> v1 = Version("1.0a5")
+    >>> v2 = Version("1.0")
+    >>> v1
+    
+    >>> v2
+    
+    >>> v1 < v2
+    True
+    >>> v1 == v2
+    False
+    >>> v1 > v2
+    False
+    >>> v1 >= v2
+    False
+    >>> v1 <= v2
+    True
+    """
 
     _regex = re.compile(r"^\s*" + VERSION_PATTERN + r"\s*$", re.VERBOSE | re.IGNORECASE)
+    _key: CmpKey
 
     def __init__(self, version: str) -> None:
+        """Initialize a Version object.
+
+        :param version:
+            The string representation of a version which will be parsed and normalized
+            before use.
+        :raises InvalidVersion:
+            If the ``version`` does not conform to PEP 440 in any way then this
+            exception will be raised.
+        """
 
         # Validate the version and parse it into pieces
         match = self._regex.search(version)
@@ -288,9 +222,19 @@ def __init__(self, version: str) -> None:
         )
 
     def __repr__(self) -> str:
+        """A representation of the Version that shows all internal state.
+
+        >>> Version('1.0.0')
+        
+        """
         return f""
 
     def __str__(self) -> str:
+        """A string representation of the version that can be rounded-tripped.
+
+        >>> str(Version("1.0a5"))
+        '1.0a5'
+        """
         parts = []
 
         # Epoch
@@ -320,29 +264,77 @@ def __str__(self) -> str:
 
     @property
     def epoch(self) -> int:
-        _epoch: int = self._version.epoch
-        return _epoch
+        """The epoch of the version.
+
+        >>> Version("2.0.0").epoch
+        0
+        >>> Version("1!2.0.0").epoch
+        1
+        """
+        return self._version.epoch
 
     @property
     def release(self) -> Tuple[int, ...]:
-        _release: Tuple[int, ...] = self._version.release
-        return _release
+        """The components of the "release" segment of the version.
+
+        >>> Version("1.2.3").release
+        (1, 2, 3)
+        >>> Version("2.0.0").release
+        (2, 0, 0)
+        >>> Version("1!2.0.0.post0").release
+        (2, 0, 0)
+
+        Includes trailing zeroes but not the epoch or any pre-release / development /
+        post-release suffixes.
+        """
+        return self._version.release
 
     @property
     def pre(self) -> Optional[Tuple[str, int]]:
-        _pre: Optional[Tuple[str, int]] = self._version.pre
-        return _pre
+        """The pre-release segment of the version.
+
+        >>> print(Version("1.2.3").pre)
+        None
+        >>> Version("1.2.3a1").pre
+        ('a', 1)
+        >>> Version("1.2.3b1").pre
+        ('b', 1)
+        >>> Version("1.2.3rc1").pre
+        ('rc', 1)
+        """
+        return self._version.pre
 
     @property
     def post(self) -> Optional[int]:
+        """The post-release number of the version.
+
+        >>> print(Version("1.2.3").post)
+        None
+        >>> Version("1.2.3.post1").post
+        1
+        """
         return self._version.post[1] if self._version.post else None
 
     @property
     def dev(self) -> Optional[int]:
+        """The development number of the version.
+
+        >>> print(Version("1.2.3").dev)
+        None
+        >>> Version("1.2.3.dev1").dev
+        1
+        """
         return self._version.dev[1] if self._version.dev else None
 
     @property
     def local(self) -> Optional[str]:
+        """The local version segment of the version.
+
+        >>> print(Version("1.2.3").local)
+        None
+        >>> Version("1.2.3+abc").local
+        'abc'
+        """
         if self._version.local:
             return ".".join(str(x) for x in self._version.local)
         else:
@@ -350,10 +342,31 @@ def local(self) -> Optional[str]:
 
     @property
     def public(self) -> str:
+        """The public portion of the version.
+
+        >>> Version("1.2.3").public
+        '1.2.3'
+        >>> Version("1.2.3+abc").public
+        '1.2.3'
+        >>> Version("1.2.3+abc.dev1").public
+        '1.2.3'
+        """
         return str(self).split("+", 1)[0]
 
     @property
     def base_version(self) -> str:
+        """The "base version" of the version.
+
+        >>> Version("1.2.3").base_version
+        '1.2.3'
+        >>> Version("1.2.3+abc").base_version
+        '1.2.3'
+        >>> Version("1!1.2.3+abc.dev1").base_version
+        '1!1.2.3'
+
+        The "base version" is the public version of the project without any pre or post
+        release markers.
+        """
         parts = []
 
         # Epoch
@@ -367,31 +380,77 @@ def base_version(self) -> str:
 
     @property
     def is_prerelease(self) -> bool:
+        """Whether this version is a pre-release.
+
+        >>> Version("1.2.3").is_prerelease
+        False
+        >>> Version("1.2.3a1").is_prerelease
+        True
+        >>> Version("1.2.3b1").is_prerelease
+        True
+        >>> Version("1.2.3rc1").is_prerelease
+        True
+        >>> Version("1.2.3dev1").is_prerelease
+        True
+        """
         return self.dev is not None or self.pre is not None
 
     @property
     def is_postrelease(self) -> bool:
+        """Whether this version is a post-release.
+
+        >>> Version("1.2.3").is_postrelease
+        False
+        >>> Version("1.2.3.post1").is_postrelease
+        True
+        """
         return self.post is not None
 
     @property
     def is_devrelease(self) -> bool:
+        """Whether this version is a development release.
+
+        >>> Version("1.2.3").is_devrelease
+        False
+        >>> Version("1.2.3.dev1").is_devrelease
+        True
+        """
         return self.dev is not None
 
     @property
     def major(self) -> int:
+        """The first item of :attr:`release` or ``0`` if unavailable.
+
+        >>> Version("1.2.3").major
+        1
+        """
         return self.release[0] if len(self.release) >= 1 else 0
 
     @property
     def minor(self) -> int:
+        """The second item of :attr:`release` or ``0`` if unavailable.
+
+        >>> Version("1.2.3").minor
+        2
+        >>> Version("1").minor
+        0
+        """
         return self.release[1] if len(self.release) >= 2 else 0
 
     @property
     def micro(self) -> int:
+        """The third item of :attr:`release` or ``0`` if unavailable.
+
+        >>> Version("1.2.3").micro
+        3
+        >>> Version("1").micro
+        0
+        """
         return self.release[2] if len(self.release) >= 3 else 0
 
 
 def _parse_letter_version(
-    letter: str, number: Union[str, bytes, SupportsInt]
+    letter: Optional[str], number: Union[str, bytes, SupportsInt, None]
 ) -> Optional[Tuple[str, int]]:
 
     if letter:
@@ -429,7 +488,7 @@ def _parse_letter_version(
 _local_version_separators = re.compile(r"[\._-]")
 
 
-def _parse_local_version(local: str) -> Optional[LocalType]:
+def _parse_local_version(local: Optional[str]) -> Optional[LocalType]:
     """
     Takes a string like abc.1.twelve and turns it into ("abc", 1, "twelve").
     """
@@ -447,7 +506,7 @@ def _cmpkey(
     pre: Optional[Tuple[str, int]],
     post: Optional[Tuple[str, int]],
     dev: Optional[Tuple[str, int]],
-    local: Optional[Tuple[SubLocalType]],
+    local: Optional[LocalType],
 ) -> CmpKey:
 
     # When we compare a release version, we want to compare it with all of the
@@ -464,7 +523,7 @@ def _cmpkey(
     # if there is not a pre or a post segment. If we have one of those then
     # the normal sorting rules will handle this case correctly.
     if pre is None and post is None and dev is not None:
-        _pre: PrePostDevType = NegativeInfinity
+        _pre: CmpPrePostDevType = NegativeInfinity
     # Versions without a pre-release (except as noted above) should sort after
     # those with one.
     elif pre is None:
@@ -474,21 +533,21 @@ def _cmpkey(
 
     # Versions without a post segment should sort before those with one.
     if post is None:
-        _post: PrePostDevType = NegativeInfinity
+        _post: CmpPrePostDevType = NegativeInfinity
 
     else:
         _post = post
 
     # Versions without a development segment should sort after those with one.
     if dev is None:
-        _dev: PrePostDevType = Infinity
+        _dev: CmpPrePostDevType = Infinity
 
     else:
         _dev = dev
 
     if local is None:
         # Versions without a local segment should sort before those with one.
-        _local: LocalType = NegativeInfinity
+        _local: CmpLocalType = NegativeInfinity
     else:
         # Versions with a local segment need that segment parsed to implement
         # the sorting rules in PEP440.
diff --git a/src/pip/_vendor/vendor.txt b/src/pip/_vendor/vendor.txt
index e9694adc037..c892f8d4ad0 100644
--- a/src/pip/_vendor/vendor.txt
+++ b/src/pip/_vendor/vendor.txt
@@ -3,7 +3,7 @@ colorama==0.4.6
 distlib==0.3.8
 distro==1.9.0
 msgpack==1.0.8
-packaging==21.3
+packaging==23.2
 platformdirs==4.2.1
 pyparsing==3.1.2
 pyproject-hooks==1.0.0