Skip to content

Commit

Permalink
Add path_safe method (#1150)
Browse files Browse the repository at this point in the history
Co-authored-by: Sam Bull <git@sambull.org>
  • Loading branch information
bdraco and Dreamsorcerer authored Sep 23, 2024
1 parent 497752c commit e7c47b1
Show file tree
Hide file tree
Showing 4 changed files with 67 additions and 1 deletion.
1 change: 1 addition & 0 deletions CHANGES/1150.feature.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Added :attr:`~yarl.URL.path_safe` to be able to fetch the path without ``%2F`` and ``%25`` decoded -- by :user:`bdraco`.
17 changes: 17 additions & 0 deletions docs/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,23 @@ There are two kinds of properties: *decoded* and *encoded* (with
>>> URL('http://example.com').path
'/'

.. warning::

In many situations it is important to distinguish between path separators
(a literal ``/``) and other forward slashes (a literal ``%2F``). Use
:attr:`URL.path_safe` for these cases.

.. attribute:: URL.path_safe

Similar to :attr:`URL.path` except it doesn't decode ``%2F`` or ``%25``.
This allows to distinguish between path separators (``/``) and encoded
slashes (``%2F``).

Note that ``%25`` is also not decoded to avoid issues with double unquoting
of values. e.g. You can unquote the value with
``URL.path_safe.replace("%2F", "/").replace("%25", %")`` to get the same
result as :meth:`URL.path`. If the ``%25`` was unquoted, it would be
impossible to tell the difference between ``%2F`` and ``%252F``.

.. attribute:: URL.path_qs

Expand Down
38 changes: 37 additions & 1 deletion tests/test_url.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from enum import Enum
from urllib.parse import SplitResult
from urllib.parse import SplitResult, quote, unquote

import pytest

Expand Down Expand Up @@ -352,6 +352,42 @@ def test_path_with_2F():
assert url.path == "/foo/bar/baz"


def test_path_safe_with_2F():
"""Path safe should not decode %2F, otherwise it may look like a path separator."""

url = URL("http://example.com/foo/bar%2fbaz")
assert url.path_safe == "/foo/bar%2Fbaz"


def test_path_safe_with_25():
"""Path safe should not decode %25, otherwise it is prone to double unquoting."""

url = URL("http://example.com/foo/bar%252Fbaz")
assert url.path_safe == "/foo/bar%252Fbaz"
unquoted = url.path_safe.replace("%2F", "/").replace("%25", "%")
assert unquoted == "/foo/bar%2Fbaz"


@pytest.mark.parametrize(
"original_path",
[
"m+@bar/baz",
"m%2B@bar/baz",
"m%252B@bar/baz",
"m%2F@bar/baz",
],
)
def test_path_safe_only_round_trips(original_path: str) -> None:
"""Path safe can round trip with documented decode method."""
encoded_once = quote(original_path, safe="")
encoded_twice = quote(encoded_once, safe="")

url = URL(f"http://example.com/{encoded_twice}")
unquoted = url.path_safe.replace("%2F", "/").replace("%25", "%")
assert unquoted == f"/{encoded_once}"
assert unquote(unquoted) == f"/{original_path}"


def test_raw_path_for_empty_url():
url = URL()
assert "" == url.raw_path
Expand Down
12 changes: 12 additions & 0 deletions yarl/_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,7 @@ class URL:

_UNQUOTER = _Unquoter()
_PATH_UNQUOTER = _Unquoter(unsafe="+")
_PATH_SAFE_UNQUOTER = _Unquoter(ignore="/%", unsafe="+")
_QS_UNQUOTER = _Unquoter(qs=True)

_val: SplitResult
Expand Down Expand Up @@ -710,6 +711,17 @@ def path(self) -> str:
"""
return self._PATH_UNQUOTER(self.raw_path)

@cached_property
def path_safe(self) -> str:
"""Decoded path of URL.
/ for absolute URLs without path part.
/ (%2F) and % (%25) are not decoded
"""
return self._PATH_SAFE_UNQUOTER(self.raw_path)

@cached_property
def _parsed_query(self) -> List[Tuple[str, str]]:
"""Parse query part of URL."""
Expand Down

0 comments on commit e7c47b1

Please sign in to comment.