Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add json_metadata property to BaseDistribution #11095

Merged
merged 3 commits into from
Jun 24, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 84 additions & 0 deletions src/pip/_internal/metadata/_json.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
# Extracted from https://github.com/pfmoore/pkg_metadata

from email.header import Header, decode_header, make_header
from email.message import Message
from typing import Any, Dict, List, Union

METADATA_FIELDS = [
# Name, Multiple-Use
("Metadata-Version", False),
("Name", False),
("Version", False),
("Dynamic", True),
("Platform", True),
("Supported-Platform", True),
("Summary", False),
("Description", False),
("Description-Content-Type", False),
("Keywords", False),
("Home-page", False),
("Download-URL", False),
("Author", False),
("Author-email", False),
("Maintainer", False),
("Maintainer-email", False),
("License", False),
("Classifier", True),
("Requires-Dist", True),
("Requires-Python", False),
("Requires-External", True),
("Project-URL", True),
("Provides-Extra", True),
("Provides-Dist", True),
("Obsoletes-Dist", True),
]


def json_name(field: str) -> str:
return field.lower().replace("-", "_")


def msg_to_json(msg: Message) -> Dict[str, Any]:
"""Convert a Message object into a JSON-compatible dictionary."""

def sanitise_header(h: Union[Header, str]) -> str:
if isinstance(h, Header):
chunks = []
for bytes, encoding in decode_header(h):
if encoding == "unknown-8bit":
try:
# See if UTF-8 works
bytes.decode("utf-8")
encoding = "utf-8"
except UnicodeDecodeError:
# If not, latin1 at least won't fail
encoding = "latin1"
chunks.append((bytes, encoding))
return str(make_header(chunks))
return str(h)

result = {}
for field, multi in METADATA_FIELDS:
if field not in msg:
continue
key = json_name(field)
if multi:
value: Union[str, List[str]] = [
sanitise_header(v) for v in msg.get_all(field)
]
else:
value = sanitise_header(msg.get(field))
if key == "keywords":
# Accept both comma-separated and space-separated
# forms, for better compatibility with old data.
if "," in value:
value = [v.strip() for v in value.split(",")]
else:
value = value.split()
result[key] = value

payload = msg.get_payload()
if payload:
result["description"] = payload

return result
15 changes: 15 additions & 0 deletions src/pip/_internal/metadata/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,10 @@
from typing import (
IO,
TYPE_CHECKING,
Any,
Collection,
Container,
Dict,
Iterable,
Iterator,
List,
Expand Down Expand Up @@ -38,6 +40,8 @@
from pip._internal.utils.packaging import safe_extra
from pip._internal.utils.urls import url_to_path

from ._json import msg_to_json

if TYPE_CHECKING:
from typing import Protocol
else:
Expand Down Expand Up @@ -379,6 +383,17 @@ def metadata(self) -> email.message.Message:
"""
return self._metadata_cached()

@property
def metadata_dict(self) -> Dict[str, Any]:
"""PEP 566 compliant JSON-serializable representation of METADATA or PKG-INFO.

This should return an empty dict if the metadata file is unavailable.

:raises NoneMetadataError: If the metadata file is available, but does
not contain valid metadata.
"""
return msg_to_json(self.metadata)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This could be cached, too. There's no real reason to recalculate it every time. Although it's probably not a performance bottleneck, so it's not a big deal either way.


@property
def metadata_version(self) -> Optional[str]:
"""Value of "Metadata-Version:" in distribution metadata, if available."""
Expand Down
22 changes: 21 additions & 1 deletion tests/unit/metadata/test_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,14 @@
import pytest
from pip._vendor.packaging.utils import NormalizedName

from pip._internal.metadata import BaseDistribution, get_directory_distribution
from pip._internal.metadata import (
BaseDistribution,
get_directory_distribution,
get_wheel_distribution,
)
from pip._internal.metadata.base import FilesystemWheel
from pip._internal.models.direct_url import DIRECT_URL_METADATA_NAME, ArchiveInfo
from tests.lib.wheel import make_wheel


@mock.patch.object(BaseDistribution, "read_text", side_effect=FileNotFoundError)
Expand Down Expand Up @@ -82,3 +88,17 @@ class FakeDistribution(BaseDistribution):
mock_read_text.assert_called_once_with(DIRECT_URL_METADATA_NAME)
assert direct_url.url == "https://e.c/p.tgz"
assert isinstance(direct_url.info, ArchiveInfo)


def test_metadata_dict(tmp_path: Path) -> None:
"""Basic test of BaseDistribution metadata_dict.

More tests are available in the original pkg_metadata project where this
function comes from, and which we may vendor in the future.
"""
wheel_path = make_wheel(name="pkga", version="1.0.1").save_to_dir(tmp_path)
wheel = FilesystemWheel(wheel_path)
dist = get_wheel_distribution(wheel, "pkga")
metadata_dict = dist.metadata_dict
assert metadata_dict["name"] == "pkga"
assert metadata_dict["version"] == "1.0.1"