Skip to content

Commit

Permalink
🗓 Nov 5, 2023 5:47:24 PM
Browse files Browse the repository at this point in the history
✨ to/from_lz77
🧪 tests added/updated
  • Loading branch information
securisec committed Nov 5, 2023
1 parent 45d5886 commit f0eed31
Show file tree
Hide file tree
Showing 5 changed files with 127 additions and 4 deletions.
4 changes: 1 addition & 3 deletions TODO
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,6 @@ New ideas:
☐ ✨ amf encode/decode
☐ ✨ aes cmac
☐ ✨ whitespace encoding https://www.dcode.fr/whitespace-language
☐ 🐙 diff show results only
☐ ✨ brainfuck encoder/decoder
☐ ✨ spoon encoder/decoder

Bug:

Expand Down Expand Up @@ -59,6 +56,7 @@ Misc:
☐ cyberchef recipe to chepy recipe converter

Archive:
✔ 🐙 diff show results only
✔ 🐙 update ascii in xor function to documentation
✔ 🐛 search with bytes is erroring
✔ ✨ base62
Expand Down
44 changes: 43 additions & 1 deletion chepy/modules/dataformat.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,13 @@
import itertools
from random import randint
from .internal.constants import Encoding
from .internal.helpers import detect_delimiter, Rotate, Uint1Array, UUEncoderDecoder
from .internal.helpers import (
detect_delimiter,
Rotate,
Uint1Array,
UUEncoderDecoder,
LZ77Compressor,
)

yaml = lazy_import.lazy_module("yaml")
import regex as re
Expand Down Expand Up @@ -2052,3 +2058,39 @@ def from_uuencode(self, header: str = "-") -> DataFormatT:
"""
self.state = UUEncoderDecoder(self._convert_to_bytes(), header).uudecode()
return self

@ChepyDecorators.call_stack
def to_lz77(
self, window_size: int = 13, lookahead_buffer_size: int = 6
) -> DataFormatT:
"""To LZ77 compression
Args:
window_size (int, optional): Window size. Defaults to 13.
lookahead_buffer_size (int, optional): Lookahead. Defaults to 6.
Returns:
Chepy: The Chepy object.
"""
self.state = LZ77Compressor(window_size, lookahead_buffer_size).compress(
self._convert_to_str()
)
return self

def from_lz77(
self, window_size: int = 13, lookahead_buffer_size: int = 6
) -> DataFormatT:
"""From LZ77 compression
Args:
window_size (int, optional): Window size. Defaults to 13.
lookahead_buffer_size (int, optional): Lookahead. Defaults to 6.
Returns:
Chepy: The Chepy object.
"""
assert isinstance(self.state, list), "State is not a list"
self.state = LZ77Compressor(window_size, lookahead_buffer_size).decompress(
self.state
)
return self
2 changes: 2 additions & 0 deletions chepy/modules/dataformat.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -110,3 +110,5 @@ class DataFormat(ChepyCore):
def from_utf21(self: DataFormatT) -> DataFormatT: ...
def to_uuencode(self: DataFormatT, header: str='-') -> DataFormatT: ...
def from_uuencode(self: DataFormatT, header: str='-') -> DataFormatT: ...
def to_lz77(self: DataFormatT, window_size: int = 13, lookahead_buffer_size: int = 6) -> DataFormatT: ...
def from_lz77(self: DataFormatT, window_size: int = 13, lookahead_buffer_size: int = 6) -> DataFormatT: ...
66 changes: 66 additions & 0 deletions chepy/modules/internal/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,72 @@
import binascii


class LZ77Compressor:
"""
Class containing compress and decompress methods using LZ77 compression algorithm.
Reference: https://the-algorithms.com/algorithm/lz-77?lang=python
"""

def __init__(self, window_size: int = 13, lookahead_buffer_size: int = 6) -> None:
self.window_size = window_size
self.lookahead_buffer_size = lookahead_buffer_size
self.search_buffer_size = self.window_size - self.lookahead_buffer_size

def compress(self, text: str) -> list:
output = []
search_buffer = ""

while text:
token = self._find_encoding_token(text, search_buffer)

search_buffer += text[: token[1] + 1]
if len(search_buffer) > self.search_buffer_size:
search_buffer = search_buffer[-self.search_buffer_size :]

text = text[token[1] + 1 :]

output.append(token)

return output

def decompress(self, tokens: list) -> str:
output = ""

for token in tokens:
for _ in range(token[1]):
output += output[-token[0]]
output += token[2]

return output

def _find_encoding_token(self, text: str, search_buffer: str):
if not text:
raise ValueError("We need some text to work with.") # pragma: no cover

length, offset = 0, 0

if not search_buffer:
return [offset, length, text[length]]

for i, character in enumerate(search_buffer): # pragma: no cover
found_offset = len(search_buffer) - i
if character == text[0]:
found_length = self._match_length_from_index(text, search_buffer, 0, i)
if found_length >= length:
offset, length = found_offset, found_length

return [offset, length, text[length]] # pragma: no cover

def _match_length_from_index(
self, text: str, window: str, text_index: int, window_index: int
) -> int:
if not text or text[text_index] != window[window_index]: # pragma: no cover
return 0
return 1 + self._match_length_from_index(
text, window + text[text_index], text_index + 1, window_index + 1
) # pragma: no cover


class UUEncoderDecoder:
def __init__(self, data: bytes, header: str = "-"):
self.data = data
Expand Down
15 changes: 15 additions & 0 deletions tests/test_dataformat.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from chepy import Chepy
import re


def test_eval():
Expand Down Expand Up @@ -734,3 +735,17 @@ def test_uuencode():
b"EKO{UUENC0DED_ENCRYPTED?}"
in Chepy(data).to_uuencode().from_uuencode().remove_nullbytes().o
)

def test_lz77():
input_str = "(0,0,O)(0,0,M)(0,0,G)(1,1,G)(3,3, )(0,0,Y)(10,1,U)(4,1,A)(0,0,R)(0,0,E)(4,1,C)(0,0,L)(9,1,S)(6,2,T)(5,1, )(3,1,H)(7,2,F)(13,1,A)(1,1,A)(2,2,G)(36,7,C)(28,5,C)(6,5,W)(3,1,L)(1,1, )(0,0,N)(10,1,W)(40,3,I)(15,1, )(3,3,T)(48,6,G)(5,1,E)(0,0,K)(22,1,{)(25,1,I)(38,1,E)(1,1,E)(3,3,E)(7,7,E)(15,15,_)(38,3,O)(2,2,O)(5,5,O)(11,11,O)(3,3,_)(63,23,})"
array_of_arrays = []
regex = r"\((\d+),(\d+),([A-Z\s_{}]+)\)"
matches = re.findall(regex, input_str)

for match in matches:
param1, param2, param3 = match
array_of_arrays.append([int(param1), int(param2), param3])

assert b'EKO{' in Chepy(array_of_arrays).from_lz77().o

assert Chepy('OMGGGGGG').to_lz77(1).o[1] == [0, 0, 'M']

0 comments on commit f0eed31

Please sign in to comment.