Skip to content

Commit

Permalink
Merge pull request #483 from miurahr/topic/miurahr/security/get-canon…
Browse files Browse the repository at this point in the history
…ical-path

Introduce helpers.canonical_path()
  • Loading branch information
miurahr authored Nov 2, 2022
2 parents 35b32f5 + 1a61ba2 commit 3c3c5c0
Show file tree
Hide file tree
Showing 3 changed files with 99 additions and 34 deletions.
90 changes: 61 additions & 29 deletions py7zr/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
#
# p7zr library
#
# Copyright (c) 2019-2021 Hiroshi Miura <miurahr@linux.com>
# Copyright (c) 2019-2022 Hiroshi Miura <miurahr@linux.com>
# Copyright (c) 2004-2015 by Joachim Bauch, mail@joachim-bauch.de
#
# This library is free software; you can redistribute it and/or
Expand All @@ -28,7 +28,7 @@
import time as _time
import zlib
from datetime import datetime, timedelta, timezone, tzinfo
from typing import BinaryIO, Optional, Union
from typing import BinaryIO, List, Optional, Union

import _hashlib # type: ignore # noqa

Expand Down Expand Up @@ -73,7 +73,7 @@ def _calculate_key1(password: bytes, cycles: int, salt: bytes, digest: str) -> b

def _calculate_key2(password: bytes, cycles: int, salt: bytes, digest: str):
"""Calculate 7zip AES encryption key.
It utilize ctypes and memoryview buffer and zero-copy technology on Python."""
It uses ctypes and memoryview buffer and zero-copy technology on Python."""
if digest not in ("sha256"):
raise ValueError("Unknown digest method for password protection.")
assert cycles <= 0x3F
Expand Down Expand Up @@ -435,43 +435,75 @@ def remove_relative_path_marker(path: str) -> str:
return processed_path


def canonical_path(target: pathlib.PurePath) -> pathlib.PurePath:
"""Return a canonical path of target argument."""
stack: List[str] = []
for p in target.parts:
if p != ".." or len(stack) == 0:
stack.append(p)
continue
# treat '..'
if stack[-1] == "..":
stack.append(p) # '../' + '../' -> '../../'
elif stack[-1] == "/":
pass # '/' + '../' -> '/'
else:
stack.pop() # 'foo/boo/' + '..' -> 'foo/'
return pathlib.PurePath(*stack)


def is_relative_to(my: pathlib.PurePath, *other) -> bool:
"""Return True when path is relative to other path, otherwise False."""
try:
my.relative_to(*other)
except ValueError:
return False
return True


def get_sanitized_output_path(fname: str, path: Optional[pathlib.Path]) -> pathlib.Path:
"""
check f.filename has invalid directory traversals
do following but is_relative_to introduced in py 3.9,
so I replaced it with relative_to. when condition is not satisfied, raise ValueError
if not pathlib.Path(...).joinpath(remove_relative_path_marker(outname)).is_relative_to(...):
raise Bad7zFile
When condition is not satisfied, raise Bad7zFile
"""
if path is None:
try:
pathlib.Path(os.getcwd()).joinpath(fname).resolve().relative_to(os.getcwd())
outfile = pathlib.Path(remove_relative_path_marker(fname))
except ValueError:
raise Bad7zFile(f"Specified path is bad: {fname}")
target_path = canonical_path(pathlib.Path.cwd().joinpath(fname))
if is_relative_to(target_path, pathlib.Path.cwd()):
return pathlib.Path(remove_relative_path_marker(fname))
else:
try:
outfile = path.joinpath(remove_relative_path_marker(fname))
outfile.resolve().relative_to(path)
except ValueError:
raise Bad7zFile(f"Specified path is bad: {fname}")
return outfile
outfile = canonical_path(path.joinpath(remove_relative_path_marker(fname)))
if is_relative_to(outfile, path):
return pathlib.Path(outfile)
raise Bad7zFile(f"Specified path is bad: {fname}")


def check_archive_path(arcname: str) -> bool:
path = pathlib.Path("/foo/boo/fuga/hoge/a90sufoiasj09/dafj08sajfa/") # dummy path
return is_target_path_valid(path, path.joinpath(arcname))
"""
Check arcname argument is valid for archive.
It should not be absolute, if so it returns False.
It should not be evil traversal attack path.
Otherwise, returns True.
"""
if pathlib.PurePath(arcname).is_absolute():
return False
# test against dummy parent path
if sys.platform == "win32":
path = pathlib.Path("C:/foo/boo/fuga/hoge/a90sufoiasj09/dafj08sajfa/")
else:
path = pathlib.Path("/foo/boo/fuga/hoge/a90sufoiasj09/dafj08sajfa/")
return is_path_valid(path.joinpath(arcname), path)


def is_target_path_valid(path: pathlib.Path, target: pathlib.Path) -> bool:
try:
if path.is_absolute():
target.resolve().relative_to(path)
else:
target.resolve().relative_to(pathlib.Path(os.getcwd()).joinpath(path))
except ValueError:
return False
return True
def is_path_valid(target: pathlib.Path, parent: pathlib.Path) -> bool:
"""
Check if target path is valid against parent path.
It returns False when target path has '..' and point out of parent path.
Otherwise, returns True.
"""
if parent.is_absolute():
return is_relative_to(canonical_path(target), parent)
else:
return is_relative_to(canonical_path(target), pathlib.Path.cwd().joinpath(parent))


def check_win32_file_namespace(pathname: pathlib.Path) -> pathlib.Path:
Expand Down
8 changes: 4 additions & 4 deletions py7zr/py7zr.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
#
# p7zr library
#
# Copyright (c) 2019-2021 Hiroshi Miura <miurahr@linux.com>
# Copyright (c) 2019-2022 Hiroshi Miura <miurahr@linux.com>
# Copyright (c) 2004-2015 by Joachim Bauch, mail@joachim-bauch.de
# 7-Zip Copyright (C) 1999-2010 Igor Pavlov
# LZMA SDK Copyright (C) 1999-2010 Igor Pavlov
Expand Down Expand Up @@ -53,7 +53,7 @@
check_archive_path,
filetime_to_dt,
get_sanitized_output_path,
is_target_path_valid,
is_path_valid,
readlink,
)
from py7zr.properties import DEFAULT_FILTERS, FILTER_DEFLATE64, MAGIC_7Z, get_default_blocksize, get_memory_limit
Expand Down Expand Up @@ -1327,7 +1327,7 @@ def _extract_single(
with io.BytesIO() as ofp:
self.decompress(fp, f.folder, ofp, f.uncompressed, f.compressed, src_end)
dst: str = ofp.read().decode("utf-8")
if is_target_path_valid(path, fileish.parent.joinpath(dst)):
if is_path_valid(fileish.parent.joinpath(dst), path):
# fileish.unlink(missing_ok=True) > py3.7
if fileish.exists():
fileish.unlink()
Expand All @@ -1341,7 +1341,7 @@ def _extract_single(
omfp.seek(0)
dst = omfp.read().decode("utf-8")
# check sym_target points inside an archive target?
if is_target_path_valid(path, fileish.parent.joinpath(dst)):
if is_path_valid(fileish.parent.joinpath(dst), path):
sym_target = pathlib.Path(dst)
# fileish.unlink(missing_ok=True) > py3.7
if fileish.exists():
Expand Down
35 changes: 34 additions & 1 deletion tests/test_attack.py → tests/test_badfiles.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
# Security protection test cases

import ctypes
import os
import pathlib
import sys
from dataclasses import dataclass
from tempfile import TemporaryDirectory

import pytest

Expand Down Expand Up @@ -50,6 +54,10 @@ def test_extract_path_traversal_attack(tmp_path):
archive.extractall(path=tmp_path)


@pytest.mark.skipif(
sys.platform.startswith("win") and (ctypes.windll.shell32.IsUserAnAdmin() == 0),
reason="Administrator rights is required to make symlink on windows",
)
@pytest.mark.misc
def test_extract_symlink_attack(tmp_path):
my_filters = [
Expand All @@ -71,3 +79,28 @@ def test_extract_symlink_attack(tmp_path):
with pytest.raises(Bad7zFile):
with SevenZipFile(target, "r") as archive:
archive.extractall(path=target_dir)


def test_write_compressed_archive(tmp_path):
@dataclass
class Contents:
filename: str
text: str

contents = (Contents(filename="bin/qmake", text="qqqqq"), Contents(filename="lib/libhoge.so", text="hoge"))
with TemporaryDirectory() as temp_path, SevenZipFile(
tmp_path / "tools_qtcreator-linux-qt.tools.qtcreator.7z", "w"
) as archive:
dest = pathlib.Path(temp_path)
for folder in ("bin", "lib", "mkspecs"):
(dest / folder).mkdir(parents=True, exist_ok=True)
for f in contents:
full_path = dest / f.filename
if not full_path.parent.exists():
full_path.parent.mkdir(parents=True)
full_path.write_text(f.text, "utf_8")
archive.writeall(path=temp_path, arcname="target")
with TemporaryDirectory() as target_path, SevenZipFile(
tmp_path / "tools_qtcreator-linux-qt.tools.qtcreator.7z", "r"
) as archive:
archive.extractall(path=target_path)

0 comments on commit 3c3c5c0

Please sign in to comment.