Skip to content

Commit

Permalink
gh-122905: Sanitize names in zipfile.Path. (#122906)
Browse files Browse the repository at this point in the history
Ported from zipp 3.19.1; ref jaraco/zipp#119.
  • Loading branch information
jaraco authored Aug 11, 2024
1 parent 4534068 commit 9cd0326
Show file tree
Hide file tree
Showing 3 changed files with 81 additions and 1 deletion.
17 changes: 17 additions & 0 deletions Lib/test/test_zipfile/_path/test_path.py
Original file line number Diff line number Diff line change
Expand Up @@ -577,3 +577,20 @@ def test_getinfo_missing(self, alpharep):
zipfile.Path(alpharep)
with self.assertRaises(KeyError):
alpharep.getinfo('does-not-exist')

def test_malformed_paths(self):
"""
Path should handle malformed paths.
"""
data = io.BytesIO()
zf = zipfile.ZipFile(data, "w")
zf.writestr("/one-slash.txt", b"content")
zf.writestr("//two-slash.txt", b"content")
zf.writestr("../parent.txt", b"content")
zf.filename = ''
root = zipfile.Path(zf)
assert list(map(str, root.iterdir())) == [
'one-slash.txt',
'two-slash.txt',
'parent.txt',
]
64 changes: 63 additions & 1 deletion Lib/zipfile/_path/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,69 @@ def __setstate__(self, state):
super().__init__(*args, **kwargs)


class CompleteDirs(InitializedState, zipfile.ZipFile):
class SanitizedNames:
"""
ZipFile mix-in to ensure names are sanitized.
"""

def namelist(self):
return list(map(self._sanitize, super().namelist()))

@staticmethod
def _sanitize(name):
r"""
Ensure a relative path with posix separators and no dot names.
Modeled after
https://github.com/python/cpython/blob/bcc1be39cb1d04ad9fc0bd1b9193d3972835a57c/Lib/zipfile/__init__.py#L1799-L1813
but provides consistent cross-platform behavior.
>>> san = SanitizedNames._sanitize
>>> san('/foo/bar')
'foo/bar'
>>> san('//foo.txt')
'foo.txt'
>>> san('foo/.././bar.txt')
'foo/bar.txt'

This comment has been minimized.

Copy link
@0-wiz-0

0-wiz-0 Aug 22, 2024

Contributor

Shouldn't this resolve to just bar.txt?

>>> san('foo../.bar.txt')
'foo../.bar.txt'
>>> san('\\foo\\bar.txt')
'foo/bar.txt'
>>> san('D:\\foo.txt')
'D/foo.txt'
>>> san('\\\\server\\share\\file.txt')
'server/share/file.txt'
>>> san('\\\\?\\GLOBALROOT\\Volume3')
'?/GLOBALROOT/Volume3'
>>> san('\\\\.\\PhysicalDrive1\\root')
'PhysicalDrive1/root'
Retain any trailing slash.
>>> san('abc/')
'abc/'
Raises a ValueError if the result is empty.
>>> san('../..')
Traceback (most recent call last):
...
ValueError: Empty filename
"""

def allowed(part):
return part and part not in {'..', '.'}

# Remove the drive letter.
# Don't use ntpath.splitdrive, because that also strips UNC paths
bare = re.sub('^([A-Z]):', r'\1', name, flags=re.IGNORECASE)
clean = bare.replace('\\', '/')
parts = clean.split('/')
joined = '/'.join(filter(allowed, parts))
if not joined:
raise ValueError("Empty filename")
return joined + '/' * name.endswith('/')


class CompleteDirs(InitializedState, SanitizedNames, zipfile.ZipFile):
"""
A ZipFile subclass that ensures that implied directories
are always included in the namelist.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
:class:`zipfile.Path` objects now sanitize names from the zipfile.

0 comments on commit 9cd0326

Please sign in to comment.