diff --git a/CHANGELOG.md b/CHANGELOG.md index 2f81376..b1475fc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,18 +1,22 @@ Unreleased --- +### Changed +- When using `ns.PATH`, only split off a maximum of two suffixes from + a file name (issues #145, #146). + [8.0.2] - 2021-12-14 --- ### Fixed -- Bug where sorting paths fail if one of the paths is '.'. +- Bug where sorting paths fail if one of the paths is '.' (issues #142, #143) [8.0.1] - 2021-12-10 --- ### Fixed - Compose unicode characters when using locale to ensure sorting is correct - across all locales. + across all locales (issues #140, #141) [8.0.0] - 2021-11-03 --- diff --git a/natsort/utils.py b/natsort/utils.py index 8d56b06..3832318 100644 --- a/natsort/utils.py +++ b/natsort/utils.py @@ -893,16 +893,21 @@ def path_splitter( path_parts = [] base = str(s) - # Now, split off the file extensions until we reach a decimal number at - # the beginning of the suffix or there are no more extensions. - suffixes = PurePath(base).suffixes - try: - digit_index = next(i for i, x in enumerate(reversed(suffixes)) if _d_match(x)) - except StopIteration: - pass - else: - digit_index = len(suffixes) - digit_index - suffixes = suffixes[digit_index:] - + # Now, split off the file extensions until + # - we reach a decimal number at the beginning of the suffix + # - more than two suffixes have been seen + # - a suffix is more than five characters (including leading ".") + # - there are no more extensions + suffixes = [] + for i, suffix in enumerate(reversed(PurePath(base).suffixes)): + if _d_match(suffix) or i > 1 or len(suffix) > 5: + break + suffixes.append(suffix) + suffixes.reverse() + + # Remove the suffixes from the base component base = base.replace("".join(suffixes), "") - return filter(None, ichain(path_parts, [base], suffixes)) + base_component = [base] if base else [] + + # Join all path comonents in an iterator + return filter(None, ichain(path_parts, base_component, suffixes)) diff --git a/tests/test_natsorted.py b/tests/test_natsorted.py index 4a64a27..eb3aefe 100644 --- a/tests/test_natsorted.py +++ b/tests/test_natsorted.py @@ -182,6 +182,21 @@ def test_natsorted_handles_numbers_and_filesystem_paths_simultaneously() -> None assert natsorted(given, alg=ns.PATH) == expected +def test_natsorted_path_extensions_heuristic() -> None: + # https://github.com/SethMMorton/natsort/issues/145 + given = [ + "Try.Me.Bug - 09 - One.Two.Three.[text].mkv", + "Try.Me.Bug - 07 - One.Two.5.[text].mkv", + "Try.Me.Bug - 08 - One.Two.Three[text].mkv", + ] + expected = [ + "Try.Me.Bug - 07 - One.Two.5.[text].mkv", + "Try.Me.Bug - 08 - One.Two.Three[text].mkv", + "Try.Me.Bug - 09 - One.Two.Three.[text].mkv", + ] + assert natsorted(given, alg=ns.PATH) == expected + + @pytest.mark.parametrize( "alg, expected", [ diff --git a/tests/test_utils.py b/tests/test_utils.py index bb229b9..b140682 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -6,7 +6,7 @@ import string from itertools import chain from operator import neg as op_neg -from typing import List, Pattern, Union +from typing import List, Pattern, Tuple, Union import pytest from hypothesis import given @@ -155,9 +155,26 @@ def test_path_splitter_splits_path_string_by_sep(x: List[str]) -> None: assert tuple(utils.path_splitter(z)) == tuple(pathlib.Path(z).parts) -def test_path_splitter_splits_path_string_by_sep_and_removes_extension_example() -> None: - given = "/this/is/a/path/file.x1.10.tar.gz" - expected = (os.sep, "this", "is", "a", "path", "file.x1.10", ".tar", ".gz") +@pytest.mark.parametrize( + "given, expected", + [ + ( + "/this/is/a/path/file.x1.10.tar.gz", + (os.sep, "this", "is", "a", "path", "file.x1.10", ".tar", ".gz"), + ), + ( + "/this/is/a/path/file.x1.10.tar", + (os.sep, "this", "is", "a", "path", "file.x1.10", ".tar"), + ), + ( + "/this/is/a/path/file.x1.threethousand.tar", + (os.sep, "this", "is", "a", "path", "file.x1.threethousand", ".tar"), + ), + ], +) +def test_path_splitter_splits_path_string_by_sep_and_removes_extension_example( + given: str, expected: Tuple[str, ...] +) -> None: assert tuple(utils.path_splitter(given)) == tuple(expected)