From 76aff708b47c127298f817a54b3817394183d635 Mon Sep 17 00:00:00 2001 From: M Bussonnier Date: Thu, 24 Oct 2024 10:05:17 +0200 Subject: [PATCH] Transform relative index url in to abosute one, And use 'PYPI' as an alias for the Pypi index. Basically some indexes (like pypi.anaconda.org), use relative urls for wheels, thus we need to resolve those before calling `wheel.download()` or it tries to dowload relative to current page. With this: https://github.com/carreau/cors running on localhost 8787 Manually install micropip: js> pyodide.loadPackage('https://files.pythonhosted.org/packages/08/aa/cc0199a5f0ad350994d660967a8efb233fe0416e4639146c089643407ce6/packaging-24.1-py3-none-any.whl'); pyodide.loadPackage('http://localhost:8088/micropip-0.6.2.dev3+g90f4991.d20241024-py3-none-any.whl') And >>> import logging ... log = logging.getLogger('micropip') ... log.setLevel(10) ... import micropip ... await micropip.install('ipython', index_urls=['http://localhost:8787/scientific-python-nightly-wheels/simple', 'PYPI']) I get a proper install of IPython nightly from an anaconda repo: >>> import micropip >>> await micropip.install('ipython', index_urls=['http://localhost:8787/scientific-python-nightly-wheels/simple', 'PYPI'], verbose=True) Collecting ipython Downloading ipython-8.29.0.dev0-py3-none-any.whl Collecting traitlets>=5.13.0 Downloading traitlets-5.14.3-py3-none-any.whl Collecting stack-data Downloading stack_data-0.6.3-py3-none-any.whl Collecting matplotlib-inline Downloading matplotlib_inline-0.1.7-py3-none-any.whl Collecting prompt-toolkit<3.1.0,>=3.0.41 Downloading prompt_toolkit-3.0.48-py3-none-any.whl Collecting decorator Downloading decorator-5.1.1-py3-none-any.whl Requirement already satisfied: traitlets (5.14.3) Collecting jedi>=0.16 Downloading jedi-0.19.1-py2.py3-none-any.whl Collecting pygments>=2.4.0 Downloading pygments-2.18.0-py3-none-any.whl Collecting asttokens>=2.1.0 Downloading asttokens-2.4.1-py2.py3-none-any.whl Collecting executing>=1.2.0 Downloading executing-2.1.0-py2.py3-none-any.whl Collecting pure-eval Downloading pure_eval-0.2.3-py3-none-any.whl Collecting wcwidth Downloading wcwidth-0.2.13-py2.py3-none-any.whl Collecting parso<0.9.0,>=0.8.3 Downloading parso-0.8.4-py2.py3-none-any.whl Collecting six>=1.12.0 Downloading six-1.16.0-py2.py3-none-any.whl Installing collected packages: traitlets, decorator, matplotlib-inline, pygments, executing, pure-eval, wcwidth, prompt-toolkit, parso, jedi, six, asttokens, stack-data, ipython Successfully installed traitlets-5.14.3, decorator-5.1.1, matplotlib-inline-0.1.7, pygments-2.18.0, executing-2.1.0, pure-eval-0.2.3, wcwidth-0.2.13, prompt-toolkit-3.0.48, parso-0.8.4, jedi-0.19.1, six-1.16.0, asttoke ns-2.4.1, stack-data-0.6.3, ipython-8.29.0.dev0 >>> (Well IPyhton does not import because no SQLITE3... but it's a progress. --- micropip/package_index.py | 106 +++++++++++++++++++++++--------------- micropip/transaction.py | 3 +- micropip/wheelinfo.py | 11 +++- 3 files changed, 75 insertions(+), 45 deletions(-) diff --git a/micropip/package_index.py b/micropip/package_index.py index f7cf02a..cd3dcfb 100644 --- a/micropip/package_index.py +++ b/micropip/package_index.py @@ -7,6 +7,7 @@ from dataclasses import dataclass from functools import partial from typing import Any +from urllib.parse import urlparse, urlunparse from packaging.utils import InvalidWheelFilename from packaging.version import InvalidVersion, Version @@ -16,7 +17,8 @@ from .externals.mousebender.simple import from_project_details_html from .wheelinfo import WheelInfo -DEFAULT_INDEX_URLS = ["https://pypi.org/simple"] +PYPI_URL = "https://pypi.org/simple" +DEFAULT_INDEX_URLS = [PYPI_URL] INDEX_URLS = DEFAULT_INDEX_URLS _formatter = string.Formatter() @@ -40,7 +42,9 @@ class ProjectInfo: releases: dict[Version, Generator[WheelInfo, None, None]] @staticmethod - def from_json_api(data: str | bytes | dict[str, Any]) -> "ProjectInfo": + def from_json_api( + data: str | bytes | dict[str, Any], index_base_url: str + ) -> "ProjectInfo": """ Parse JSON API response @@ -68,7 +72,9 @@ def from_json_api(data: str | bytes | dict[str, Any]) -> "ProjectInfo": return ProjectInfo._compatible_only(name, releases) @staticmethod - def from_simple_json_api(data: str | bytes | dict[str, Any]) -> "ProjectInfo": + def from_simple_json_api( + data: str | bytes | dict[str, Any], index_base_url: str + ) -> "ProjectInfo": """ Parse Simple JSON API response @@ -76,23 +82,25 @@ def from_simple_json_api(data: str | bytes | dict[str, Any]) -> "ProjectInfo": """ data_dict = json.loads(data) if isinstance(data, str | bytes) else data - name, releases = ProjectInfo._parse_pep691_response(data_dict) + name, releases = ProjectInfo._parse_pep691_response(data_dict, index_base_url) return ProjectInfo._compatible_only(name, releases) @staticmethod - def from_simple_html_api(data: str, pkgname: str) -> "ProjectInfo": + def from_simple_html_api( + data: str, pkgname: str, index_base_url: str + ) -> "ProjectInfo": """ Parse Simple HTML API response https://peps.python.org/pep-0503 """ project_detail = from_project_details_html(data, pkgname) - name, releases = ProjectInfo._parse_pep691_response(project_detail) # type: ignore[arg-type] + name, releases = ProjectInfo._parse_pep691_response(project_detail, index_base_url) # type: ignore[arg-type] return ProjectInfo._compatible_only(name, releases) @staticmethod def _parse_pep691_response( - resp: dict[str, Any] + resp: dict[str, Any], index_base_url: str ) -> tuple[str, dict[Version, list[Any]]]: name = resp["name"] @@ -123,11 +131,42 @@ def _parse_pep691_response( version = parse_version(filename) except (InvalidVersion, InvalidWheelFilename): continue + if file["url"].startswith("/"): + file["url"] = index_base_url + file["url"] releases[version].append(file) return name, releases + @classmethod + def _compatible_wheels( + cls, files: list[dict[str, Any]], version: Version, name: str + ) -> Generator[WheelInfo, None, None]: + for file in files: + filename = file["filename"] + + # Checking compatibility takes a bit of time, + # so we use a generator to avoid doing it for all files. + compatible = is_package_compatible(filename) + if not compatible: + continue + + # JSON API has a "digests" key, while Simple API has a "hashes" key. + hashes = file["digests"] if "digests" in file else file["hashes"] + sha256 = hashes.get("sha256") + + # Size of the file in bytes, if available (PEP 700) + # This key is not available in the Simple API HTML response, so this field may be None + size = file.get("size") + yield WheelInfo.from_package_index( + name=name, + filename=filename, + url=file["url"], + version=version, + sha256=sha256, + size=size, + ) + @classmethod def _compatible_only( cls, name: str, releases: dict[Version, list[dict[str, Any]]] @@ -137,37 +176,8 @@ def _compatible_only( Checking compatibility takes a bit of time, so we use a generator to avoid doing it if not needed. """ - def _compatible_wheels( - files: list[dict[str, Any]], version: Version - ) -> Generator[WheelInfo, None, None]: - for file in files: - filename = file["filename"] - - # Checking compatibility takes a bit of time, - # so we use a generator to avoid doing it for all files. - compatible = is_package_compatible(filename) - if not compatible: - continue - - # JSON API has a "digests" key, while Simple API has a "hashes" key. - hashes = file["digests"] if "digests" in file else file["hashes"] - sha256 = hashes.get("sha256") - - # Size of the file in bytes, if available (PEP 700) - # This key is not available in the Simple API HTML response, so this field may be None - size = file.get("size") - - yield WheelInfo.from_package_index( - name=name, - filename=filename, - url=file["url"], - version=version, - sha256=sha256, - size=size, - ) - releases_compatible = { - version: _compatible_wheels(files, version) + version: cls._compatible_wheels(files, version, name=name) for version, files in releases.items() } @@ -218,21 +228,29 @@ def _contain_placeholder(url: str, placeholder: str = "package_name") -> bool: return placeholder in fields -def _select_parser(content_type: str, pkgname: str) -> Callable[[str], ProjectInfo]: +def _select_parser( + content_type: str, pkgname: str, index_base_url: str +) -> Callable[[str], ProjectInfo]: """ Select the function to parse the response based on the content type. """ match content_type: case "application/vnd.pypi.simple.v1+json": - return ProjectInfo.from_simple_json_api + return partial( + ProjectInfo.from_simple_json_api, index_base_url=index_base_url + ) case "application/json": - return ProjectInfo.from_json_api + return partial(ProjectInfo.from_json_api, index_base_url=index_base_url) case ( "application/vnd.pypi.simple.v1+html" | "text/html" | "text/html; charset=utf-8" ): - return partial(ProjectInfo.from_simple_html_api, pkgname=pkgname) + return partial( + ProjectInfo.from_simple_html_api, + pkgname=pkgname, + index_base_url=index_base_url, + ) case _: raise ValueError(f"Unsupported content type: {content_type}") @@ -276,6 +294,8 @@ async def query_package( elif isinstance(index_urls, str): index_urls = [index_urls] + index_urls = [PYPI_URL if url == "PYPI" else url for url in index_urls] + for url in index_urls: logger.debug("Looping through index urls: %r", url) if _contain_placeholder(url): @@ -297,7 +317,9 @@ async def query_package( content_type = headers.get("content-type", "").lower() try: - parser = _select_parser(content_type, name) + base_url = urlunparse(urlparse(url)._replace(path="")) + + parser = _select_parser(content_type, name, index_base_url=base_url) except ValueError as e: raise ValueError(f"Error trying to decode url: {url}") from e return parser(metadata) diff --git a/micropip/transaction.py b/micropip/transaction.py index 9d64a72..94f2fea 100644 --- a/micropip/transaction.py +++ b/micropip/transaction.py @@ -197,8 +197,6 @@ async def _add_requirement_from_package_index(self, req: Requirement): req.name, self.fetch_kwargs, index_urls=self.index_urls ) - logger.debug("Transaction: got metadata %r for requirement %r", metadata, req) - wheel = find_wheel(metadata, req) logger.debug("Transaction: Selected wheel: %r", wheel) @@ -246,6 +244,7 @@ async def add_wheel( logger.info(" Downloading %s", wheel.url.split("/")[-1]) await wheel.download(self.fetch_kwargs) + logger.debug(" Downloaded %s", wheel.url.split("/")[-1]) if self.deps: await self.gather_requirements(wheel.requires(extras)) diff --git a/micropip/wheelinfo.py b/micropip/wheelinfo.py index d9c5a89..d94dd97 100644 --- a/micropip/wheelinfo.py +++ b/micropip/wheelinfo.py @@ -1,6 +1,7 @@ import hashlib import io import json +import logging import zipfile from dataclasses import dataclass from pathlib import Path @@ -20,6 +21,8 @@ from ._utils import parse_wheel_filename from .metadata import Metadata, safe_name, wheel_dist_info_dir +logger = logging.getLogger("micropip") + @dataclass class PackageData: @@ -149,8 +152,14 @@ def requires(self, extras: set[str]) -> list[Requirement]: return requires async def _fetch_bytes(self, fetch_kwargs: dict[str, Any]): + if self.parsed_url.scheme not in ("https", "http"): + # Don't raise ValueError it gets swallowed + raise TypeError(f"Cannot download from a non-remote location: {self.url!r}") try: - return await fetch_bytes(self.url, fetch_kwargs) + logger.debug("Fetching URL %r", self.url) + bytes = await fetch_bytes(self.url, fetch_kwargs) + logger.debug("Fetched URL %r", self.url) + return bytes except OSError as e: if self.parsed_url.hostname in [ "files.pythonhosted.org",