Skip to content

Commit

Permalink
Fix archive bug (fixes #41) (#42)
Browse files Browse the repository at this point in the history
* Refactoring test suites
* Add compression tests
* Fix archive logics
- Update write_uint64()
- Use write_uint64() for size field
* fix writing of attributes

Signed-off-by: Hiroshi Miura <miurahr@linux.com>
  • Loading branch information
miurahr authored Dec 19, 2019
1 parent 26bea07 commit e8c611e
Show file tree
Hide file tree
Showing 8 changed files with 112 additions and 84 deletions.
72 changes: 39 additions & 33 deletions py7zr/archiveinfo.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,18 +131,27 @@ def write_uint64(file: BinaryIO, value: int):
| 11111110 BYTE y[7] : y
| 11111111 BYTE y[8] : y
"""
mask = 0x80
length = (value.bit_length() + 7) // 8 or 1
ba = bytearray(value.to_bytes(length, 'little'))
for _ in range(length - 1):
mask |= mask >> 1
if ba[0] >= 2 ** (8 - length):
file.write(pack('B', mask))
elif length > 1:
ba[0] |= mask
if value < 0x80:
file.write(pack('B', value))
return
if value > 0x01ffffffffffffff:
file.write(b'\xff')
file.write(value.to_bytes(8, 'little'))
return
byte_length = (value.bit_length() + 7) // 8
ba = bytearray(value.to_bytes(byte_length, 'little'))
high_byte = int(ba[-1])
if high_byte < 2 << (8 - byte_length - 1):
for x in range(byte_length - 1):
high_byte |= 0x80 >> x
file.write(pack('B', high_byte))
file.write(ba[:byte_length - 1])
else:
pass
file.write(ba)
mask = 0x80
for x in range(byte_length):
mask |= 0x80 >> x
file.write(pack('B', mask))
file.write(ba)


def read_boolean(file: BinaryIO, count: int, checkall: bool = False) -> List[bool]:
Expand Down Expand Up @@ -481,18 +490,18 @@ def _retrieve_coders_info(self, file: BinaryIO):
folder.crc = crcs[idx]
pid = file.read(1)
if pid != Property.END:
raise Bad7zFile('end id expected but %s found' % repr(pid))
raise Bad7zFile('end id expected but %s found at %d' % (repr(pid), file.tell()))

def write(self, file: BinaryIO):
assert self.numfolders is not None
assert self.folders is not None
assert self.numfolders == len(self.folders)
file.write(Property.UNPACK_INFO)
file.write(Property.FOLDER)
write_uint64(file, self.numfolders)
write_byte(file, b'\x00')
for i in range(self.numfolders):
for folder in self.folders:
folder.write(file)
for folder in self.folders:
folder.write(file)
# If support external entity, we may write
# self.datastreamidx here.
# folder data will be written in another place.
Expand Down Expand Up @@ -696,8 +705,6 @@ def _read(self, fp: BinaryIO):
isempty = read_boolean(buffer, numfiles, checkall=False)
list(map(lambda x, y: x.update({'emptystream': y}), self.files, isempty)) # type: ignore
numemptystreams += isempty.count(True)
self.emptyfiles = [False] * numemptystreams
self.antifiles = [False] * numemptystreams
elif prop == Property.EMPTY_FILE:
self.emptyfiles = read_boolean(buffer, numemptystreams, checkall=False)
elif prop == Property.ANTI:
Expand Down Expand Up @@ -768,10 +775,10 @@ def _write_times(self, fp: BinaryIO, propid, name: str) -> None:
if f[name] is not None:
defined.append(True)
num_defined += 1
if num_defined == len(defined):
write_byte(fp, (num_defined * 8 + 2).to_bytes(1, byteorder='little'))
else:
write_byte(fp, (num_defined * 8 + bits_to_bytes(num_defined) + 2).to_bytes(1, byteorder='little'))
size = num_defined * 8 + 2
if not reduce(and_, defined):
size += bits_to_bytes(num_defined)
write_uint64(fp, size)
write_boolean(fp, defined, all_defined=True)
write_byte(fp, b'\x00')
for i, file in enumerate(self.files):
Expand Down Expand Up @@ -811,18 +818,17 @@ def _write_attributes(self, file):
defined = [] # type: List[bool]
num_defined = 0
for f in self.files:
if 'attributes' in f.keys():
if f['attributes'] is not None:
defined.append(True)
num_defined += 1
continue
defined.append(False)
if 'attributes' in f.keys() and f['attributes'] is not None:
defined.append(True)
num_defined += 1
else:
defined.append(False)
size = num_defined * 4 + 2
if num_defined != len(defined):
size += bits_to_bytes(num_defined)
write_byte(file, Property.ATTRIBUTES)
if num_defined == len(defined):
write_byte(file, (num_defined * 4 + 2).to_bytes(1, byteorder='little'))
else:
write_byte(file, (num_defined * 4 + bits_to_bytes(num_defined) + 2).to_bytes(1, byteorder='little'))
write_boolean(file, defined, all_defined=False)
write_uint64(file, size)
write_boolean(file, defined, all_defined=True)
write_byte(file, b'\x00')
for i, f in enumerate(self.files):
if defined[i]:
Expand All @@ -838,7 +844,7 @@ def write(self, file: BinaryIO):
emptystreams.append(f['emptystream'])
if self._are_there(emptystreams):
write_byte(file, Property.EMPTY_STREAM)
write_byte(file, bits_to_bytes(numfiles).to_bytes(1, 'little'))
write_uint64(file, bits_to_bytes(numfiles))
write_boolean(file, emptystreams, all_defined=False)
else:
if self._are_there(self.emptyfiles):
Expand Down
1 change: 0 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
texttable
pytest
pytest-benchmark[histogram]
pytest-cov
pytest-timeout
coverage
Expand Down
8 changes: 0 additions & 8 deletions tests/bench_compare.sh

This file was deleted.

50 changes: 42 additions & 8 deletions tests/test_compress.py → tests/test_archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def test_simple_compress_and_decompress():

@pytest.mark.basic
@pytest.mark.skipif(sys.version_info < (3, 6), reason="requires python3.6 or higher")
def test_py7zr_compress_single_encoded_header(capsys, tmp_path):
def test_compress_single_encoded_header(capsys, tmp_path):
target = tmp_path.joinpath('target.7z')
archive = py7zr.SevenZipFile(target, 'w')
archive.set_encoded_header_mode(True)
Expand Down Expand Up @@ -73,7 +73,7 @@ def test_py7zr_compress_single_encoded_header(capsys, tmp_path):

@pytest.mark.basic
@pytest.mark.skipif(sys.version_info < (3, 6), reason="requires python3.6 or higher")
def test_py7zr_compress_directory_encoded_header(tmp_path):
def test_compress_directory_encoded_header(tmp_path):
target = tmp_path.joinpath('target.7z')
archive = py7zr.SevenZipFile(target, 'w')
archive.set_encoded_header_mode(True)
Expand All @@ -99,7 +99,7 @@ def test_py7zr_compress_directory_encoded_header(tmp_path):

@pytest.mark.file
@pytest.mark.skipif(sys.version_info < (3, 6), reason="requires python3.6 or higher")
def test_py7zr_compress_files_encoded_header(tmp_path):
def test_compress_files_encoded_header(tmp_path):
tmp_path.joinpath('src').mkdir()
tmp_path.joinpath('tgt').mkdir()
py7zr.unpack_7zarchive(os.path.join(testdata_path, 'test_1.7z'), path=tmp_path.joinpath('src'))
Expand Down Expand Up @@ -155,7 +155,7 @@ def test_py7zr_compress_files_encoded_header(tmp_path):

@pytest.mark.basic
@pytest.mark.skipif(sys.version_info < (3, 6), reason="requires python3.6 or higher")
def test_py7zr_compress_single(capsys, tmp_path):
def test_compress_file_0(capsys, tmp_path):
target = tmp_path.joinpath('target.7z')
archive = py7zr.SevenZipFile(target, 'w')
archive.set_encoded_header_mode(False)
Expand Down Expand Up @@ -183,7 +183,7 @@ def test_py7zr_compress_single(capsys, tmp_path):

@pytest.mark.basic
@pytest.mark.skipif(sys.version_info < (3, 6), reason="requires python3.6 or higher")
def test_py7zr_compress_directory(tmp_path):
def test_compress_directory(tmp_path):
target = tmp_path.joinpath('target.7z')
archive = py7zr.SevenZipFile(target, 'w')
archive.set_encoded_header_mode(False)
Expand All @@ -209,7 +209,7 @@ def test_py7zr_compress_directory(tmp_path):

@pytest.mark.file
@pytest.mark.skipif(sys.version_info < (3, 6), reason="requires python3.6 or higher")
def test_py7zr_compress_files(tmp_path):
def test_compress_files_1(tmp_path):
tmp_path.joinpath('src').mkdir()
tmp_path.joinpath('tgt').mkdir()
py7zr.unpack_7zarchive(os.path.join(testdata_path, 'test_1.7z'), path=tmp_path.joinpath('src'))
Expand Down Expand Up @@ -286,7 +286,7 @@ def test_register_archive_format(tmp_path):

@pytest.mark.api
@pytest.mark.skipif(sys.version_info < (3, 6), reason="requires python3.6 or higher")
def test_py7zr_compress_with_simple_filter(tmp_path):
def test_compress_with_simple_filter(tmp_path):
my_filters = [{"id": lzma.FILTER_LZMA2, "preset": lzma.PRESET_DEFAULT}, ]
target = tmp_path.joinpath('target.7z')
archive = py7zr.SevenZipFile(target, 'w', filters=my_filters)
Expand All @@ -296,7 +296,7 @@ def test_py7zr_compress_with_simple_filter(tmp_path):

@pytest.mark.api
@pytest.mark.skipif(sys.version_info < (3, 6), reason="requires python3.6 or higher")
def test_py7zr_compress_with_custom_filter(tmp_path):
def test_compress_with_custom_filter(tmp_path):
my_filters = [
{"id": lzma.FILTER_DELTA, "dist": 5},
{"id": lzma.FILTER_LZMA2, "preset": 7 | lzma.PRESET_EXTREME},
Expand All @@ -305,3 +305,37 @@ def test_py7zr_compress_with_custom_filter(tmp_path):
archive = py7zr.SevenZipFile(target, 'w', filters=my_filters)
archive.writeall(os.path.join(testdata_path, "src"), "src")
archive.close()


@pytest.mark.file
@pytest.mark.skipif(sys.version_info < (3, 6), reason="requires python3.6 or higher")
def test_compress_files_2(tmp_path):
tmp_path.joinpath('src').mkdir()
tmp_path.joinpath('tgt').mkdir()
py7zr.unpack_7zarchive(os.path.join(testdata_path, 'test_2.7z'), path=tmp_path.joinpath('src'))
target = tmp_path.joinpath('target.7z')
os.chdir(tmp_path.joinpath('src'))
archive = py7zr.SevenZipFile(target, 'w')
archive.set_encoded_header_mode(False)
archive.writeall('.')
archive.close()
reader = py7zr.SevenZipFile(target, 'r')
reader.extractall(path=tmp_path.joinpath('tgt'))
reader.close()


@pytest.mark.file
@pytest.mark.skipif(sys.version_info < (3, 6), reason="requires python3.6 or higher")
def test_compress_files_3(tmp_path):
tmp_path.joinpath('src').mkdir()
tmp_path.joinpath('tgt').mkdir()
py7zr.unpack_7zarchive(os.path.join(testdata_path, 'test_3.7z'), path=tmp_path.joinpath('src'))
target = tmp_path.joinpath('target.7z')
os.chdir(tmp_path.joinpath('src'))
archive = py7zr.SevenZipFile(target, 'w')
archive.set_encoded_header_mode(False)
archive.writeall('.')
archive.close()
reader = py7zr.SevenZipFile(target, 'r')
reader.extractall(path=tmp_path.joinpath('tgt'))
reader.close()
21 changes: 0 additions & 21 deletions tests/test_benchmark.py

This file was deleted.

File renamed without changes.
33 changes: 29 additions & 4 deletions tests/test_basic_unit.py → tests/test_unit.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,8 +262,9 @@ def test_write_booleans(booleans, all_defined, expected):
@pytest.mark.unit
@pytest.mark.parametrize("testinput, expected",
[(1, b'\x01'), (127, b'\x7f'), (128, b'\x80\x80'), (65535, b'\xc0\xff\xff'),
(441, b'\xc0\xb9\x01'),
(0xffff7f, b'\xe0\x7f\xff\xff'), (0xffffffff, b'\xf0\xff\xff\xff\xff'),
(441, b'\x81\xb9'),
(0xffff7f, b'\xe0\x7f\xff\xff'), (0x0e002100, b'\xee\x00\x21\x00'),
(0xffffffff, b'\xf0\xff\xff\xff\xff'),
(0x7f1234567f, b'\xf8\x7f\x56\x34\x12\x7f'),
(0x1234567890abcd, b'\xfe\xcd\xab\x90\x78\x56\x34\x12'),
(0xcf1234567890abcd, b'\xff\xcd\xab\x90\x78\x56\x34\x12\xcf')])
Expand All @@ -276,9 +277,10 @@ def test_write_uint64(testinput, expected):

@pytest.mark.unit
@pytest.mark.parametrize("testinput, expected",
[(b'\x01', 1), (b'\x7f', 127), (b'\x80\x80', 128), (b'\xc0\xb9\x01', 441),
[(b'\x01', 1), (b'\x7f', 127), (b'\x80\x80', 128), (b'\x81\xb9', 441),
(b'\xc0\xff\xff', 65535),
(b'\xe0\x7f\xff\xff', 0xffff7f), (b'\xf0\xff\xff\xff\xff', 0xffffffff),
(b'\xe0\x7f\xff\xff', 0xffff7f), (b'\xee\x00\x21\x00', 0x0e002100),
(b'\xf0\xff\xff\xff\xff', 0xffffffff),
(b'\xf8\x7f\x56\x34\x12\x7f', 0x7f1234567f),
(b'\xfe\xcd\xab\x90\x78\x56\x34\x12', 0x1234567890abcd),
(b'\xff\xcd\xab\x90\x78\x56\x34\x12\xcf', 0xcf1234567890abcd)])
Expand Down Expand Up @@ -486,3 +488,26 @@ def test_make_file_info2():
assert file_info.get('emptystream')
flag = stat.FILE_ATTRIBUTE_DIRECTORY
assert file_info.get('attributes') & flag == flag


@pytest.mark.unit
def test_simple_compress_and_decompress():
sevenzip_compressor = py7zr.compression.SevenZipCompressor()
lzc = sevenzip_compressor.compressor
out1 = lzc.compress(b"Some data\n")
out2 = lzc.compress(b"Another piece of data\n")
out3 = lzc.compress(b"Even more data\n")
out4 = lzc.flush()
result = b"".join([out1, out2, out3, out4])
size = len(result)
#
filters = sevenzip_compressor.filters
decompressor = lzma.LZMADecompressor(format=lzma.FORMAT_RAW, filters=filters)
out5 = decompressor.decompress(result)
assert out5 == b'Some data\nAnother piece of data\nEven more data\n'
#
coders = sevenzip_compressor.coders
crc = py7zr.helpers.calculate_crc32(result)
decompressor = py7zr.compression.SevenZipDecompressor(coders, size, crc)
out6 = decompressor.decompress(result)
assert out6 == b'Some data\nAnother piece of data\nEven more data\n'
11 changes: 2 additions & 9 deletions tox.ini
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[tox]
envlist = clean, mypy, check, pypy3, py{35,36,37,38}, bench, report, docs
envlist = clean, mypy, check, pypy3, py{35,36,37,38}, report, docs

[testenv]
passenv = TRAVIS TRAVIS_* APPVEYOR APPVEYOR_*
Expand All @@ -8,7 +8,6 @@ commands =
python -m pytest -vv
depends =
pypy3,py{35,36,37,38}: clean, check
bench: py37

[flake8]
ignore = F841
Expand All @@ -21,7 +20,7 @@ basepython =
pypy3: pypy3
python_files = test*.py
norecursedirs = .git _build tmp* .eggs
addopts = --benchmark-autosave --cov-report=term-missing:skip-covered --cov-append --cov=py7zr
addopts = --cov-report=term-missing:skip-covered --cov-append --cov=py7zr
timeout = 300
markers =
asyncio: mark a atest as an asyncio test.
Expand All @@ -30,7 +29,6 @@ markers =
api: mark a test as an interface test.
unit: mark a test as an unit test.
cli: mark a test as a cli test.
perf: mark a test as a performance test.

[testenv:mypy]
basepython = python3.7
Expand Down Expand Up @@ -73,11 +71,6 @@ deps = coverage
skip_install = true
commands = coverage erase

[testenv:bench]
basepython = python3.7
deps = pytest-benchmark
commands = pytest-benchmark compare

[testenv:coveralls]
deps =
coveralls
Expand Down

0 comments on commit e8c611e

Please sign in to comment.