From 1a61ba2073796d1f7ad8bfd9c48d5bd269df6d71 Mon Sep 17 00:00:00 2001 From: Hiroshi Miura Date: Wed, 2 Nov 2022 08:14:24 +0900 Subject: [PATCH] Introduce helpers.canonical_path() - Handle '../../' with canonical_path() that handle PurePath object - Add helpers.is_relative_to(a, b) - Remove resolve() from path checkers - Add test cases from aqtinstall project - Fix is_valid_path check on Windows - Rename a bad path test file as test_badfiles.py Signed-off-by: Hiroshi Miura --- py7zr/helpers.py | 90 +++++++++++++++------- py7zr/py7zr.py | 8 +- tests/{test_attack.py => test_badfiles.py} | 35 ++++++++- 3 files changed, 99 insertions(+), 34 deletions(-) rename tests/{test_attack.py => test_badfiles.py} (64%) diff --git a/py7zr/helpers.py b/py7zr/helpers.py index 4e4e2542..fd92894b 100644 --- a/py7zr/helpers.py +++ b/py7zr/helpers.py @@ -2,7 +2,7 @@ # # p7zr library # -# Copyright (c) 2019-2021 Hiroshi Miura +# Copyright (c) 2019-2022 Hiroshi Miura # Copyright (c) 2004-2015 by Joachim Bauch, mail@joachim-bauch.de # # This library is free software; you can redistribute it and/or @@ -28,7 +28,7 @@ import time as _time import zlib from datetime import datetime, timedelta, timezone, tzinfo -from typing import BinaryIO, Optional, Union +from typing import BinaryIO, List, Optional, Union import _hashlib # type: ignore # noqa @@ -73,7 +73,7 @@ def _calculate_key1(password: bytes, cycles: int, salt: bytes, digest: str) -> b def _calculate_key2(password: bytes, cycles: int, salt: bytes, digest: str): """Calculate 7zip AES encryption key. - It utilize ctypes and memoryview buffer and zero-copy technology on Python.""" + It uses ctypes and memoryview buffer and zero-copy technology on Python.""" if digest not in ("sha256"): raise ValueError("Unknown digest method for password protection.") assert cycles <= 0x3F @@ -435,43 +435,75 @@ def remove_relative_path_marker(path: str) -> str: return processed_path +def canonical_path(target: pathlib.PurePath) -> pathlib.PurePath: + """Return a canonical path of target argument.""" + stack: List[str] = [] + for p in target.parts: + if p != ".." or len(stack) == 0: + stack.append(p) + continue + # treat '..' + if stack[-1] == "..": + stack.append(p) # '../' + '../' -> '../../' + elif stack[-1] == "/": + pass # '/' + '../' -> '/' + else: + stack.pop() # 'foo/boo/' + '..' -> 'foo/' + return pathlib.PurePath(*stack) + + +def is_relative_to(my: pathlib.PurePath, *other) -> bool: + """Return True when path is relative to other path, otherwise False.""" + try: + my.relative_to(*other) + except ValueError: + return False + return True + + def get_sanitized_output_path(fname: str, path: Optional[pathlib.Path]) -> pathlib.Path: """ check f.filename has invalid directory traversals - do following but is_relative_to introduced in py 3.9, - so I replaced it with relative_to. when condition is not satisfied, raise ValueError - if not pathlib.Path(...).joinpath(remove_relative_path_marker(outname)).is_relative_to(...): - raise Bad7zFile + When condition is not satisfied, raise Bad7zFile """ if path is None: - try: - pathlib.Path(os.getcwd()).joinpath(fname).resolve().relative_to(os.getcwd()) - outfile = pathlib.Path(remove_relative_path_marker(fname)) - except ValueError: - raise Bad7zFile(f"Specified path is bad: {fname}") + target_path = canonical_path(pathlib.Path.cwd().joinpath(fname)) + if is_relative_to(target_path, pathlib.Path.cwd()): + return pathlib.Path(remove_relative_path_marker(fname)) else: - try: - outfile = path.joinpath(remove_relative_path_marker(fname)) - outfile.resolve().relative_to(path) - except ValueError: - raise Bad7zFile(f"Specified path is bad: {fname}") - return outfile + outfile = canonical_path(path.joinpath(remove_relative_path_marker(fname))) + if is_relative_to(outfile, path): + return pathlib.Path(outfile) + raise Bad7zFile(f"Specified path is bad: {fname}") def check_archive_path(arcname: str) -> bool: - path = pathlib.Path("/foo/boo/fuga/hoge/a90sufoiasj09/dafj08sajfa/") # dummy path - return is_target_path_valid(path, path.joinpath(arcname)) + """ + Check arcname argument is valid for archive. + It should not be absolute, if so it returns False. + It should not be evil traversal attack path. + Otherwise, returns True. + """ + if pathlib.PurePath(arcname).is_absolute(): + return False + # test against dummy parent path + if sys.platform == "win32": + path = pathlib.Path("C:/foo/boo/fuga/hoge/a90sufoiasj09/dafj08sajfa/") + else: + path = pathlib.Path("/foo/boo/fuga/hoge/a90sufoiasj09/dafj08sajfa/") + return is_path_valid(path.joinpath(arcname), path) -def is_target_path_valid(path: pathlib.Path, target: pathlib.Path) -> bool: - try: - if path.is_absolute(): - target.resolve().relative_to(path) - else: - target.resolve().relative_to(pathlib.Path(os.getcwd()).joinpath(path)) - except ValueError: - return False - return True +def is_path_valid(target: pathlib.Path, parent: pathlib.Path) -> bool: + """ + Check if target path is valid against parent path. + It returns False when target path has '..' and point out of parent path. + Otherwise, returns True. + """ + if parent.is_absolute(): + return is_relative_to(canonical_path(target), parent) + else: + return is_relative_to(canonical_path(target), pathlib.Path.cwd().joinpath(parent)) def check_win32_file_namespace(pathname: pathlib.Path) -> pathlib.Path: diff --git a/py7zr/py7zr.py b/py7zr/py7zr.py index 0d6e929f..d014b1f0 100644 --- a/py7zr/py7zr.py +++ b/py7zr/py7zr.py @@ -2,7 +2,7 @@ # # p7zr library # -# Copyright (c) 2019-2021 Hiroshi Miura +# Copyright (c) 2019-2022 Hiroshi Miura # Copyright (c) 2004-2015 by Joachim Bauch, mail@joachim-bauch.de # 7-Zip Copyright (C) 1999-2010 Igor Pavlov # LZMA SDK Copyright (C) 1999-2010 Igor Pavlov @@ -53,7 +53,7 @@ check_archive_path, filetime_to_dt, get_sanitized_output_path, - is_target_path_valid, + is_path_valid, readlink, ) from py7zr.properties import DEFAULT_FILTERS, FILTER_DEFLATE64, MAGIC_7Z, get_default_blocksize, get_memory_limit @@ -1327,7 +1327,7 @@ def _extract_single( with io.BytesIO() as ofp: self.decompress(fp, f.folder, ofp, f.uncompressed, f.compressed, src_end) dst: str = ofp.read().decode("utf-8") - if is_target_path_valid(path, fileish.parent.joinpath(dst)): + if is_path_valid(fileish.parent.joinpath(dst), path): # fileish.unlink(missing_ok=True) > py3.7 if fileish.exists(): fileish.unlink() @@ -1341,7 +1341,7 @@ def _extract_single( omfp.seek(0) dst = omfp.read().decode("utf-8") # check sym_target points inside an archive target? - if is_target_path_valid(path, fileish.parent.joinpath(dst)): + if is_path_valid(fileish.parent.joinpath(dst), path): sym_target = pathlib.Path(dst) # fileish.unlink(missing_ok=True) > py3.7 if fileish.exists(): diff --git a/tests/test_attack.py b/tests/test_badfiles.py similarity index 64% rename from tests/test_attack.py rename to tests/test_badfiles.py index 87d505bd..32e1e1ed 100644 --- a/tests/test_attack.py +++ b/tests/test_badfiles.py @@ -1,6 +1,10 @@ # Security protection test cases - +import ctypes import os +import pathlib +import sys +from dataclasses import dataclass +from tempfile import TemporaryDirectory import pytest @@ -50,6 +54,10 @@ def test_extract_path_traversal_attack(tmp_path): archive.extractall(path=tmp_path) +@pytest.mark.skipif( + sys.platform.startswith("win") and (ctypes.windll.shell32.IsUserAnAdmin() == 0), + reason="Administrator rights is required to make symlink on windows", +) @pytest.mark.misc def test_extract_symlink_attack(tmp_path): my_filters = [ @@ -71,3 +79,28 @@ def test_extract_symlink_attack(tmp_path): with pytest.raises(Bad7zFile): with SevenZipFile(target, "r") as archive: archive.extractall(path=target_dir) + + +def test_write_compressed_archive(tmp_path): + @dataclass + class Contents: + filename: str + text: str + + contents = (Contents(filename="bin/qmake", text="qqqqq"), Contents(filename="lib/libhoge.so", text="hoge")) + with TemporaryDirectory() as temp_path, SevenZipFile( + tmp_path / "tools_qtcreator-linux-qt.tools.qtcreator.7z", "w" + ) as archive: + dest = pathlib.Path(temp_path) + for folder in ("bin", "lib", "mkspecs"): + (dest / folder).mkdir(parents=True, exist_ok=True) + for f in contents: + full_path = dest / f.filename + if not full_path.parent.exists(): + full_path.parent.mkdir(parents=True) + full_path.write_text(f.text, "utf_8") + archive.writeall(path=temp_path, arcname="target") + with TemporaryDirectory() as target_path, SevenZipFile( + tmp_path / "tools_qtcreator-linux-qt.tools.qtcreator.7z", "r" + ) as archive: + archive.extractall(path=target_path)