Skip to content

Commit

Permalink
Merge pull request #11617 from trail-of-forks/ww/restrict-egg-fragement
Browse files Browse the repository at this point in the history
Restrict `#egg=` fragments to valid PEP 508 names
  • Loading branch information
pradyunsg authored Dec 28, 2022
2 parents cecd346 + 64fe223 commit dca39dd
Show file tree
Hide file tree
Showing 5 changed files with 63 additions and 5 deletions.
9 changes: 7 additions & 2 deletions docs/html/topics/vcs-support.md
Original file line number Diff line number Diff line change
Expand Up @@ -139,9 +139,14 @@ option.
pip looks at 2 fragments for VCS URLs:

- `egg`: For specifying the "project name" for use in pip's dependency
resolution logic. eg: `egg=project_name`
resolution logic. e.g.: `egg=project_name`

The `egg` fragment **should** be a bare
[PEP 508](https://peps.python.org/pep-0508/) project name. Anything else
is not guaranteed to work.

- `subdirectory`: For specifying the path to the Python package, when it is not
in the root of the VCS directory. eg: `pkg_dir`
in the root of the VCS directory. e.g.: `pkg_dir`

````{admonition} Example
If your repository layout is:
Expand Down
File renamed without changes.
3 changes: 3 additions & 0 deletions news/11617.bugfix.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Deprecated a historical ambiguity in how ``egg`` fragments in URL-style
requirements are formatted and handled. ``egg`` fragments that do not look
like PEP 508 names now produce a deprecation warning.
25 changes: 22 additions & 3 deletions src/pip/_internal/models/link.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
Union,
)

from pip._internal.utils.deprecation import deprecated
from pip._internal.utils.filetypes import WHEEL_EXTENSION
from pip._internal.utils.hashes import Hashes
from pip._internal.utils.misc import (
Expand Down Expand Up @@ -166,6 +167,7 @@ class Link(KeyBasedCompareMixin):
"dist_info_metadata",
"link_hash",
"cache_link_parsing",
"egg_fragment",
]

def __init__(
Expand Down Expand Up @@ -229,6 +231,7 @@ def __init__(
super().__init__(key=url, defining_class=Link)

self.cache_link_parsing = cache_link_parsing
self.egg_fragment = self._egg_fragment()

@classmethod
def from_json(
Expand Down Expand Up @@ -358,12 +361,28 @@ def url_without_fragment(self) -> str:

_egg_fragment_re = re.compile(r"[#&]egg=([^&]*)")

@property
def egg_fragment(self) -> Optional[str]:
# Per PEP 508.
_project_name_re = re.compile(
r"^([A-Z0-9]|[A-Z0-9][A-Z0-9._-]*[A-Z0-9])$", re.IGNORECASE
)

def _egg_fragment(self) -> Optional[str]:
match = self._egg_fragment_re.search(self._url)
if not match:
return None
return match.group(1)

# An egg fragment looks like a PEP 508 project name, along with
# an optional extras specifier. Anything else is invalid.
project_name = match.group(1)
if not self._project_name_re.match(project_name):
deprecated(
reason=f"{self} contains an egg fragment with a non-PEP 508 name",
replacement="to use the req @ url syntax, and remove the egg fragment",
gone_in="25.0",
issue=11617,
)

return project_name

_subdirectory_fragment_re = re.compile(r"[#&]subdirectory=([^&]*)")

Expand Down
31 changes: 31 additions & 0 deletions tests/unit/test_link.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,37 @@ def test_fragments(self) -> None:
assert "eggname" == Link(url).egg_fragment
assert "subdir" == Link(url).subdirectory_fragment

# Extras are supported and preserved in the egg fragment,
# even the empty extras specifier.
# This behavior is deprecated and will change in pip 25.
url = "git+https://example.com/package#egg=eggname[extra]"
assert "eggname[extra]" == Link(url).egg_fragment
assert None is Link(url).subdirectory_fragment
url = "git+https://example.com/package#egg=eggname[extra1,extra2]"
assert "eggname[extra1,extra2]" == Link(url).egg_fragment
assert None is Link(url).subdirectory_fragment
url = "git+https://example.com/package#egg=eggname[]"
assert "eggname[]" == Link(url).egg_fragment
assert None is Link(url).subdirectory_fragment

@pytest.mark.xfail(reason="Behavior change scheduled for 25.0", strict=True)
@pytest.mark.parametrize(
"fragment",
[
# Package names in egg fragments must be in PEP 508 form.
"~invalid~package~name~",
# Version specifiers are not valid in egg fragments.
"eggname==1.2.3",
"eggname>=1.2.3",
# The extras specifier must be in PEP 508 form.
"eggname[!]",
],
)
def test_invalid_egg_fragments(self, fragment: str) -> None:
url = f"git+https://example.com/package#egg={fragment}"
with pytest.raises(Exception):
Link(url)

@pytest.mark.parametrize(
"yanked_reason, expected",
[
Expand Down

3 comments on commit dca39dd

@matteius
Copy link
Member

@matteius matteius commented on dca39dd May 19, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So pipenv definitely uses the extras in the egg fragment today -- is the idea then to use:
Instead of: git+https://github.com/foo/bar.git@some-rev#egg=bar[dev]
This is supposed to be what we support: bar[dev] @ git+https://github.com/foo/bar.git@some-rev
That will be a bit of work for requirementslib that I'll have to take on based on how baked in the egg fragments are to having extras work, but before I take this on I want to understand if I am heading in the right direction. @pradyunsg and @woodruffw ? (sorry for commenting here, the issue was locked)

@matteius
Copy link
Member

@matteius matteius commented on dca39dd May 20, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Followup question would be how to handle subdirectories? Before it was the final thing appended to the uri after the egg fragment (and possible extras):

    if subdirectory:
        uri = "{0}&subdirectory={1}".format(uri, subdirectory)

@sbidoul
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I want to understand if I am heading in the right direction

@matteius bar[dev] @ git+https://github.com/foo/bar.git@some-rev is the PEP 508 syntax, so yes?

Regarding subdirectory fragments, pip extracts them with this regex

_subdirectory_fragment_re = re.compile(r"[#&]subdirectory=([^&]*)")

so they can be anywhere in the fragments list.

Please sign in to comment.