From 686818ed557467c5e94f627bfa71c742c09c87db Mon Sep 17 00:00:00 2001 From: "Jason R. Coombs" Date: Sun, 11 Aug 2024 20:33:33 -0400 Subject: [PATCH] gh-122903: Honor directories in zipfile.Path.glob. (GH-122908) (cherry picked from commit 6aa35f3002dda25858d47e702e750e2871e42a7c) Co-authored-by: Jason R. Coombs --- Lib/test/test_zipfile/_path/test_path.py | 29 +++++++++++++++---- Lib/zipfile/_path/__init__.py | 8 +++-- Lib/zipfile/_path/glob.py | 10 ++++++- ...-08-11-14-23-07.gh-issue-122903.xktZta.rst | 2 ++ 4 files changed, 40 insertions(+), 9 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-08-11-14-23-07.gh-issue-122903.xktZta.rst diff --git a/Lib/test/test_zipfile/_path/test_path.py b/Lib/test/test_zipfile/_path/test_path.py index 4a31caee0b81fa..90d6df0ddcc062 100644 --- a/Lib/test/test_zipfile/_path/test_path.py +++ b/Lib/test/test_zipfile/_path/test_path.py @@ -101,7 +101,7 @@ def zipfile_ondisk(self, alpharep): def test_iterdir_and_types(self, alpharep): root = zipfile.Path(alpharep) assert root.is_dir() - a, k, b, g, j = root.iterdir() + a, n, b, g, j = root.iterdir() assert a.is_file() assert b.is_dir() assert g.is_dir() @@ -121,7 +121,7 @@ def test_is_file_missing(self, alpharep): @pass_alpharep def test_iterdir_on_file(self, alpharep): root = zipfile.Path(alpharep) - a, k, b, g, j = root.iterdir() + a, n, b, g, j = root.iterdir() with self.assertRaises(ValueError): a.iterdir() @@ -136,7 +136,7 @@ def test_subdir_is_dir(self, alpharep): @pass_alpharep def test_open(self, alpharep): root = zipfile.Path(alpharep) - a, k, b, g, j = root.iterdir() + a, n, b, g, j = root.iterdir() with a.open(encoding="utf-8") as strm: data = strm.read() self.assertEqual(data, "content of a") @@ -240,7 +240,7 @@ def test_open_missing_directory(self, alpharep): @pass_alpharep def test_read(self, alpharep): root = zipfile.Path(alpharep) - a, k, b, g, j = root.iterdir() + a, n, b, g, j = root.iterdir() assert a.read_text(encoding="utf-8") == "content of a" # Also check positional encoding arg (gh-101144). assert a.read_text("utf-8") == "content of a" @@ -306,7 +306,7 @@ def test_mutability(self, alpharep): reflect that change. """ root = zipfile.Path(alpharep) - a, k, b, g, j = root.iterdir() + a, n, b, g, j = root.iterdir() alpharep.writestr('foo.txt', 'foo') alpharep.writestr('bar/baz.txt', 'baz') assert any(child.name == 'foo.txt' for child in root.iterdir()) @@ -475,6 +475,18 @@ def test_glob_recursive(self, alpharep): assert list(root.glob("**/*.txt")) == list(root.rglob("*.txt")) + @pass_alpharep + def test_glob_dirs(self, alpharep): + root = zipfile.Path(alpharep) + assert list(root.glob('b')) == [zipfile.Path(alpharep, "b/")] + assert list(root.glob('b*')) == [zipfile.Path(alpharep, "b/")] + + @pass_alpharep + def test_glob_subdir(self, alpharep): + root = zipfile.Path(alpharep) + assert list(root.glob('g/h')) == [zipfile.Path(alpharep, "g/h/")] + assert list(root.glob('g*/h*')) == [zipfile.Path(alpharep, "g/h/")] + @pass_alpharep def test_glob_subdirs(self, alpharep): root = zipfile.Path(alpharep) @@ -594,3 +606,10 @@ def test_malformed_paths(self): 'two-slash.txt', 'parent.txt', ] + + @pass_alpharep + def test_interface(self, alpharep): + from importlib.resources.abc import Traversable + + zf = zipfile.Path(alpharep) + assert isinstance(zf, Traversable) diff --git a/Lib/zipfile/_path/__init__.py b/Lib/zipfile/_path/__init__.py index 0f1814735cb78b..3c01659c613021 100644 --- a/Lib/zipfile/_path/__init__.py +++ b/Lib/zipfile/_path/__init__.py @@ -250,7 +250,10 @@ def _extract_text_encoding(encoding=None, *args, **kwargs): class Path: """ - A pathlib-compatible interface for zip files. + A :class:`importlib.resources.abc.Traversable` interface for zip files. + + Implements many of the features users enjoy from + :class:`pathlib.Path`. Consider a zip file with this structure:: @@ -466,8 +469,7 @@ def glob(self, pattern): prefix = re.escape(self.at) tr = Translator(seps='/') matches = re.compile(prefix + tr.translate(pattern)).fullmatch - names = (data.filename for data in self.root.filelist) - return map(self._next, filter(matches, names)) + return map(self._next, filter(matches, self.root.namelist())) def rglob(self, pattern): return self.glob(f'**/{pattern}') diff --git a/Lib/zipfile/_path/glob.py b/Lib/zipfile/_path/glob.py index 69c41d77c3f654..4320f1c0badcf9 100644 --- a/Lib/zipfile/_path/glob.py +++ b/Lib/zipfile/_path/glob.py @@ -28,7 +28,7 @@ def translate(self, pattern): """ Given a glob pattern, produce a regex that matches it. """ - return self.extend(self.translate_core(pattern)) + return self.extend(self.match_dirs(self.translate_core(pattern))) def extend(self, pattern): r""" @@ -41,6 +41,14 @@ def extend(self, pattern): """ return rf'(?s:{pattern})\Z' + def match_dirs(self, pattern): + """ + Ensure that zipfile.Path directory names are matched. + + zipfile.Path directory names always end in a slash. + """ + return rf'{pattern}[/]?' + def translate_core(self, pattern): r""" Given a glob pattern, produce a regex that matches it. diff --git a/Misc/NEWS.d/next/Library/2024-08-11-14-23-07.gh-issue-122903.xktZta.rst b/Misc/NEWS.d/next/Library/2024-08-11-14-23-07.gh-issue-122903.xktZta.rst new file mode 100644 index 00000000000000..c2a1e64d1f6db1 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-08-11-14-23-07.gh-issue-122903.xktZta.rst @@ -0,0 +1,2 @@ +``zipfile.Path.glob`` now correctly matches directories instead of +silently omitting them.