From e79384cc6fc4c04f89f0ee98159b5ad266fd20ff Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Mon, 21 Aug 2023 12:56:46 +0200 Subject: [PATCH] gh-107845: Fix symlink handling for tarfile.data_filter (GH-107846) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit (cherry picked from commit acbd3f9c5c5f23e95267714e41236140d84fe962) Co-authored-by: Petr Viktorin Co-authored-by: Victor Stinner Co-authored-by: Lumír 'Frenzy' Balhar --- Doc/library/tarfile.rst | 5 + Lib/tarfile.py | 11 +- Lib/test/test_tarfile.py | 146 +++++++++++++++++- ...-08-10-17-36-22.gh-issue-107845.dABiMJ.rst | 3 + 4 files changed, 156 insertions(+), 9 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2023-08-10-17-36-22.gh-issue-107845.dABiMJ.rst diff --git a/Doc/library/tarfile.rst b/Doc/library/tarfile.rst index b7b089a73e6483..61a450cf88b0ac 100644 --- a/Doc/library/tarfile.rst +++ b/Doc/library/tarfile.rst @@ -732,6 +732,11 @@ A ``TarInfo`` object has the following public data attributes: Name of the target file name, which is only present in :class:`TarInfo` objects of type :const:`LNKTYPE` and :const:`SYMTYPE`. + For symbolic links (``SYMTYPE``), the *linkname* is relative to the directory + that contains the link. + For hard links (``LNKTYPE``), the *linkname* is relative to the root of + the archive. + .. attribute:: TarInfo.uid :type: int diff --git a/Lib/tarfile.py b/Lib/tarfile.py index 130b5e0f45dcd5..c6a753b2b98266 100755 --- a/Lib/tarfile.py +++ b/Lib/tarfile.py @@ -741,7 +741,7 @@ def __init__(self, tarinfo): class AbsoluteLinkError(FilterError): def __init__(self, tarinfo): self.tarinfo = tarinfo - super().__init__(f'{tarinfo.name!r} is a symlink to an absolute path') + super().__init__(f'{tarinfo.name!r} is a link to an absolute path') class LinkOutsideDestinationError(FilterError): def __init__(self, tarinfo, path): @@ -801,7 +801,14 @@ def _get_filtered_attrs(member, dest_path, for_data=True): if member.islnk() or member.issym(): if os.path.isabs(member.linkname): raise AbsoluteLinkError(member) - target_path = os.path.realpath(os.path.join(dest_path, member.linkname)) + if member.issym(): + target_path = os.path.join(dest_path, + os.path.dirname(name), + member.linkname) + else: + target_path = os.path.join(dest_path, + member.linkname) + target_path = os.path.realpath(target_path) if os.path.commonpath([target_path, dest_path]) != dest_path: raise LinkOutsideDestinationError(member, target_path) return new_attrs diff --git a/Lib/test/test_tarfile.py b/Lib/test/test_tarfile.py index cdea033ec1244a..3eee186d07c4cc 100644 --- a/Lib/test/test_tarfile.py +++ b/Lib/test/test_tarfile.py @@ -3239,10 +3239,12 @@ def __exit__(self, *exc): self.bio = None def add(self, name, *, type=None, symlink_to=None, hardlink_to=None, - mode=None, **kwargs): + mode=None, size=None, **kwargs): """Add a member to the test archive. Call within `with`.""" name = str(name) tarinfo = tarfile.TarInfo(name).replace(**kwargs) + if size is not None: + tarinfo.size = size if mode: tarinfo.mode = _filemode_to_int(mode) if symlink_to is not None: @@ -3318,7 +3320,8 @@ def check_context(self, tar, filter): raise self.raised_exception self.assertEqual(self.expected_paths, set()) - def expect_file(self, name, type=None, symlink_to=None, mode=None): + def expect_file(self, name, type=None, symlink_to=None, mode=None, + size=None): """Check a single file. See check_context.""" if self.raised_exception: raise self.raised_exception @@ -3347,6 +3350,8 @@ def expect_file(self, name, type=None, symlink_to=None, mode=None): self.assertTrue(path.is_fifo()) else: raise NotImplementedError(type) + if size is not None: + self.assertEqual(path.stat().st_size, size) for parent in path.parents: self.expected_paths.discard(parent) @@ -3393,8 +3398,15 @@ def test_parent_symlink(self): # Test interplaying symlinks # Inspired by 'dirsymlink2a' in jwilk/traversal-archives with ArchiveMaker() as arc: + + # `current` links to `.` which is both: + # - the destination directory + # - `current` itself arc.add('current', symlink_to='.') + + # effectively points to ./../ arc.add('parent', symlink_to='current/..') + arc.add('parent/evil') if os_helper.can_symlink(): @@ -3436,9 +3448,46 @@ def test_parent_symlink(self): def test_parent_symlink2(self): # Test interplaying symlinks # Inspired by 'dirsymlink2b' in jwilk/traversal-archives + + # Posix and Windows have different pathname resolution: + # either symlink or a '..' component resolve first. + # Let's see which we are on. + if os_helper.can_symlink(): + testpath = os.path.join(TEMPDIR, 'resolution_test') + os.mkdir(testpath) + + # testpath/current links to `.` which is all of: + # - `testpath` + # - `testpath/current` + # - `testpath/current/current` + # - etc. + os.symlink('.', os.path.join(testpath, 'current')) + + # we'll test where `testpath/current/../file` ends up + with open(os.path.join(testpath, 'current', '..', 'file'), 'w'): + pass + + if os.path.exists(os.path.join(testpath, 'file')): + # Windows collapses 'current\..' to '.' first, leaving + # 'testpath\file' + dotdot_resolves_early = True + elif os.path.exists(os.path.join(testpath, '..', 'file')): + # Posix resolves 'current' to '.' first, leaving + # 'testpath/../file' + dotdot_resolves_early = False + else: + raise AssertionError('Could not determine link resolution') + with ArchiveMaker() as arc: + + # `current` links to `.` which is both the destination directory + # and `current` itself arc.add('current', symlink_to='.') + + # `current/parent` is also available as `./parent`, + # and effectively points to `./../` arc.add('current/parent', symlink_to='..') + arc.add('parent/evil') with self.check_context(arc.open(), 'fully_trusted'): @@ -3452,6 +3501,7 @@ def test_parent_symlink2(self): with self.check_context(arc.open(), 'tar'): if os_helper.can_symlink(): + # Fail when extracting a file outside destination self.expect_exception( tarfile.OutsideDestinationError, "'parent/evil' would be extracted to " @@ -3462,10 +3512,24 @@ def test_parent_symlink2(self): self.expect_file('parent/evil') with self.check_context(arc.open(), 'data'): - self.expect_exception( - tarfile.LinkOutsideDestinationError, - """'current/parent' would link to ['"].*['"], """ - + "which is outside the destination") + if os_helper.can_symlink(): + if dotdot_resolves_early: + # Fail when extracting a file outside destination + self.expect_exception( + tarfile.OutsideDestinationError, + "'parent/evil' would be extracted to " + + """['"].*evil['"], which is outside """ + + "the destination") + else: + # Fail as soon as we have a symlink outside the destination + self.expect_exception( + tarfile.LinkOutsideDestinationError, + "'current/parent' would link to " + + """['"].*outerdir['"], which is outside """ + + "the destination") + else: + self.expect_file('current/') + self.expect_file('parent/evil') @symlink_test def test_absolute_symlink(self): @@ -3495,12 +3559,30 @@ def test_absolute_symlink(self): with self.check_context(arc.open(), 'data'): self.expect_exception( tarfile.AbsoluteLinkError, - "'parent' is a symlink to an absolute path") + "'parent' is a link to an absolute path") + + def test_absolute_hardlink(self): + # Test hardlink to an absolute path + # Inspired by 'dirsymlink' in https://github.com/jwilk/traversal-archives + with ArchiveMaker() as arc: + arc.add('parent', hardlink_to=self.outerdir / 'foo') + + with self.check_context(arc.open(), 'fully_trusted'): + self.expect_exception(KeyError, ".*foo. not found") + + with self.check_context(arc.open(), 'tar'): + self.expect_exception(KeyError, ".*foo. not found") + + with self.check_context(arc.open(), 'data'): + self.expect_exception( + tarfile.AbsoluteLinkError, + "'parent' is a link to an absolute path") @symlink_test def test_sly_relative0(self): # Inspired by 'relative0' in jwilk/traversal-archives with ArchiveMaker() as arc: + # points to `../../tmp/moo` arc.add('../moo', symlink_to='..//tmp/moo') try: @@ -3551,6 +3633,56 @@ def test_sly_relative2(self): + """['"].*moo['"], which is outside the """ + "destination") + @symlink_test + def test_deep_symlink(self): + # Test that symlinks and hardlinks inside a directory + # point to the correct file (`target` of size 3). + # If links aren't supported we get a copy of the file. + with ArchiveMaker() as arc: + arc.add('targetdir/target', size=3) + # a hardlink's linkname is relative to the archive + arc.add('linkdir/hardlink', hardlink_to=os.path.join( + 'targetdir', 'target')) + # a symlink's linkname is relative to the link's directory + arc.add('linkdir/symlink', symlink_to=os.path.join( + '..', 'targetdir', 'target')) + + for filter in 'tar', 'data', 'fully_trusted': + with self.check_context(arc.open(), filter): + self.expect_file('targetdir/target', size=3) + self.expect_file('linkdir/hardlink', size=3) + if os_helper.can_symlink(): + self.expect_file('linkdir/symlink', size=3, + symlink_to='../targetdir/target') + else: + self.expect_file('linkdir/symlink', size=3) + + @symlink_test + def test_chains(self): + # Test chaining of symlinks/hardlinks. + # Symlinks are created before the files they point to. + with ArchiveMaker() as arc: + arc.add('linkdir/symlink', symlink_to='hardlink') + arc.add('symlink2', symlink_to=os.path.join( + 'linkdir', 'hardlink2')) + arc.add('targetdir/target', size=3) + arc.add('linkdir/hardlink', hardlink_to='targetdir/target') + arc.add('linkdir/hardlink2', hardlink_to='linkdir/symlink') + + for filter in 'tar', 'data', 'fully_trusted': + with self.check_context(arc.open(), filter): + self.expect_file('targetdir/target', size=3) + self.expect_file('linkdir/hardlink', size=3) + self.expect_file('linkdir/hardlink2', size=3) + if os_helper.can_symlink(): + self.expect_file('linkdir/symlink', size=3, + symlink_to='hardlink') + self.expect_file('symlink2', size=3, + symlink_to='linkdir/hardlink2') + else: + self.expect_file('linkdir/symlink', size=3) + self.expect_file('symlink2', size=3) + def test_modes(self): # Test how file modes are extracted # (Note that the modes are ignored on platforms without working chmod) diff --git a/Misc/NEWS.d/next/Library/2023-08-10-17-36-22.gh-issue-107845.dABiMJ.rst b/Misc/NEWS.d/next/Library/2023-08-10-17-36-22.gh-issue-107845.dABiMJ.rst new file mode 100644 index 00000000000000..32c1fb93f4ab2c --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-08-10-17-36-22.gh-issue-107845.dABiMJ.rst @@ -0,0 +1,3 @@ +:func:`tarfile.data_filter` now takes the location of symlinks into account +when determining their target, so it will no longer reject some valid +tarballs with ``LinkOutsideDestinationError``.