Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Over-zealous extension splitting #146

Merged
merged 3 commits into from
Jan 30, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,18 +1,22 @@
Unreleased
---

### Changed
- When using `ns.PATH`, only split off a maximum of two suffixes from
a file name (issues #145, #146).

[8.0.2] - 2021-12-14
---

### Fixed
- Bug where sorting paths fail if one of the paths is '.'.
- Bug where sorting paths fail if one of the paths is '.' (issues #142, #143)

[8.0.1] - 2021-12-10
---

### Fixed
- Compose unicode characters when using locale to ensure sorting is correct
across all locales.
across all locales (issues #140, #141)

[8.0.0] - 2021-11-03
---
Expand Down
29 changes: 17 additions & 12 deletions natsort/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -893,16 +893,21 @@ def path_splitter(
path_parts = []
base = str(s)

# Now, split off the file extensions until we reach a decimal number at
# the beginning of the suffix or there are no more extensions.
suffixes = PurePath(base).suffixes
try:
digit_index = next(i for i, x in enumerate(reversed(suffixes)) if _d_match(x))
except StopIteration:
pass
else:
digit_index = len(suffixes) - digit_index
suffixes = suffixes[digit_index:]

# Now, split off the file extensions until
# - we reach a decimal number at the beginning of the suffix
# - more than two suffixes have been seen
# - a suffix is more than five characters (including leading ".")
# - there are no more extensions
suffixes = []
for i, suffix in enumerate(reversed(PurePath(base).suffixes)):
if _d_match(suffix) or i > 1 or len(suffix) > 5:
break
suffixes.append(suffix)
suffixes.reverse()

# Remove the suffixes from the base component
base = base.replace("".join(suffixes), "")
return filter(None, ichain(path_parts, [base], suffixes))
base_component = [base] if base else []

# Join all path comonents in an iterator
return filter(None, ichain(path_parts, base_component, suffixes))
15 changes: 15 additions & 0 deletions tests/test_natsorted.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,21 @@ def test_natsorted_handles_numbers_and_filesystem_paths_simultaneously() -> None
assert natsorted(given, alg=ns.PATH) == expected


def test_natsorted_path_extensions_heuristic() -> None:
# https://github.com/SethMMorton/natsort/issues/145
given = [
"Try.Me.Bug - 09 - One.Two.Three.[text].mkv",
"Try.Me.Bug - 07 - One.Two.5.[text].mkv",
"Try.Me.Bug - 08 - One.Two.Three[text].mkv",
]
expected = [
"Try.Me.Bug - 07 - One.Two.5.[text].mkv",
"Try.Me.Bug - 08 - One.Two.Three[text].mkv",
"Try.Me.Bug - 09 - One.Two.Three.[text].mkv",
]
assert natsorted(given, alg=ns.PATH) == expected


@pytest.mark.parametrize(
"alg, expected",
[
Expand Down
25 changes: 21 additions & 4 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import string
from itertools import chain
from operator import neg as op_neg
from typing import List, Pattern, Union
from typing import List, Pattern, Tuple, Union

import pytest
from hypothesis import given
Expand Down Expand Up @@ -155,9 +155,26 @@ def test_path_splitter_splits_path_string_by_sep(x: List[str]) -> None:
assert tuple(utils.path_splitter(z)) == tuple(pathlib.Path(z).parts)


def test_path_splitter_splits_path_string_by_sep_and_removes_extension_example() -> None:
given = "/this/is/a/path/file.x1.10.tar.gz"
expected = (os.sep, "this", "is", "a", "path", "file.x1.10", ".tar", ".gz")
@pytest.mark.parametrize(
"given, expected",
[
(
"/this/is/a/path/file.x1.10.tar.gz",
(os.sep, "this", "is", "a", "path", "file.x1.10", ".tar", ".gz"),
),
(
"/this/is/a/path/file.x1.10.tar",
(os.sep, "this", "is", "a", "path", "file.x1.10", ".tar"),
),
(
"/this/is/a/path/file.x1.threethousand.tar",
(os.sep, "this", "is", "a", "path", "file.x1.threethousand", ".tar"),
),
],
)
def test_path_splitter_splits_path_string_by_sep_and_removes_extension_example(
given: str, expected: Tuple[str, ...]
) -> None:
assert tuple(utils.path_splitter(given)) == tuple(expected)


Expand Down