🗓 Nov 5, 2023 5:47:24 PM

✨ to/from_lz77 🧪 tests added/updated
securisec · Nov 5, 2023 · f0eed31 · f0eed31
1 parent 45d5886
commit f0eed31
Show file tree

Hide file tree

Showing 5 changed files with 127 additions and 4 deletions.
diff --git a/TODO b/TODO
@@ -26,9 +26,6 @@ New ideas:
   ☐ ✨ amf encode/decode
   ☐ ✨ aes cmac
   ☐ ✨ whitespace encoding https://www.dcode.fr/whitespace-language
-  ☐ 🐙 diff show results only
-  ☐ ✨ brainfuck encoder/decoder
-  ☐ ✨ spoon encoder/decoder
 
 Bug:
 
@@ -59,6 +56,7 @@ Misc:
   ☐ cyberchef recipe to chepy recipe converter
 
 Archive:
+  ✔ 🐙 diff show results only
   ✔ 🐙 update ascii in xor function to documentation
   ✔ 🐛 search with bytes is erroring
   ✔ ✨ base62

diff --git a/chepy/modules/dataformat.py b/chepy/modules/dataformat.py
@@ -11,7 +11,13 @@
 import itertools
 from random import randint
 from .internal.constants import Encoding
-from .internal.helpers import detect_delimiter, Rotate, Uint1Array, UUEncoderDecoder
+from .internal.helpers import (
+    detect_delimiter,
+    Rotate,
+    Uint1Array,
+    UUEncoderDecoder,
+    LZ77Compressor,
+)
 
 yaml = lazy_import.lazy_module("yaml")
 import regex as re
@@ -2052,3 +2058,39 @@ def from_uuencode(self, header: str = "-") -> DataFormatT:
         """
         self.state = UUEncoderDecoder(self._convert_to_bytes(), header).uudecode()
         return self
+
+    @ChepyDecorators.call_stack
+    def to_lz77(
+        self, window_size: int = 13, lookahead_buffer_size: int = 6
+    ) -> DataFormatT:
+        """To LZ77 compression
+
+        Args:
+            window_size (int, optional): Window size. Defaults to 13.
+            lookahead_buffer_size (int, optional): Lookahead. Defaults to 6.
+
+        Returns:
+            Chepy: The Chepy object.
+        """
+        self.state = LZ77Compressor(window_size, lookahead_buffer_size).compress(
+            self._convert_to_str()
+        )
+        return self
+
+    def from_lz77(
+        self, window_size: int = 13, lookahead_buffer_size: int = 6
+    ) -> DataFormatT:
+        """From LZ77 compression
+
+        Args:
+            window_size (int, optional): Window size. Defaults to 13.
+            lookahead_buffer_size (int, optional): Lookahead. Defaults to 6.
+
+        Returns:
+            Chepy: The Chepy object.
+        """
+        assert isinstance(self.state, list), "State is not a list"
+        self.state = LZ77Compressor(window_size, lookahead_buffer_size).decompress(
+            self.state
+        )
+        return self
diff --git a/chepy/modules/dataformat.pyi b/chepy/modules/dataformat.pyi
@@ -110,3 +110,5 @@ class DataFormat(ChepyCore):
     def from_utf21(self: DataFormatT) -> DataFormatT: ...
     def to_uuencode(self: DataFormatT, header: str='-') -> DataFormatT: ...
     def from_uuencode(self: DataFormatT, header: str='-') -> DataFormatT: ...
+    def to_lz77(self: DataFormatT, window_size: int = 13, lookahead_buffer_size: int = 6) -> DataFormatT: ...
+    def from_lz77(self: DataFormatT, window_size: int = 13, lookahead_buffer_size: int = 6) -> DataFormatT: ...
diff --git a/chepy/modules/internal/helpers.py b/chepy/modules/internal/helpers.py
@@ -2,6 +2,72 @@
 import binascii
 
 
+class LZ77Compressor:
+    """
+    Class containing compress and decompress methods using LZ77 compression algorithm.
+    Reference: https://the-algorithms.com/algorithm/lz-77?lang=python
+    """
+
+    def __init__(self, window_size: int = 13, lookahead_buffer_size: int = 6) -> None:
+        self.window_size = window_size
+        self.lookahead_buffer_size = lookahead_buffer_size
+        self.search_buffer_size = self.window_size - self.lookahead_buffer_size
+
+    def compress(self, text: str) -> list:
+        output = []
+        search_buffer = ""
+
+        while text:
+            token = self._find_encoding_token(text, search_buffer)
+
+            search_buffer += text[: token[1] + 1]
+            if len(search_buffer) > self.search_buffer_size:
+                search_buffer = search_buffer[-self.search_buffer_size :]
+
+            text = text[token[1] + 1 :]
+
+            output.append(token)
+
+        return output
+
+    def decompress(self, tokens: list) -> str:
+        output = ""
+
+        for token in tokens:
+            for _ in range(token[1]):
+                output += output[-token[0]]
+            output += token[2]
+
+        return output
+
+    def _find_encoding_token(self, text: str, search_buffer: str):
+        if not text:
+            raise ValueError("We need some text to work with.")  # pragma: no cover
+
+        length, offset = 0, 0
+
+        if not search_buffer:
+            return [offset, length, text[length]]
+
+        for i, character in enumerate(search_buffer):  # pragma: no cover
+            found_offset = len(search_buffer) - i
+            if character == text[0]:
+                found_length = self._match_length_from_index(text, search_buffer, 0, i)
+                if found_length >= length:
+                    offset, length = found_offset, found_length
+
+        return [offset, length, text[length]]  # pragma: no cover
+
+    def _match_length_from_index(
+        self, text: str, window: str, text_index: int, window_index: int
+    ) -> int:
+        if not text or text[text_index] != window[window_index]:  # pragma: no cover
+            return 0
+        return 1 + self._match_length_from_index(
+            text, window + text[text_index], text_index + 1, window_index + 1
+        )  # pragma: no cover
+
+
 class UUEncoderDecoder:
     def __init__(self, data: bytes, header: str = "-"):
         self.data = data

diff --git a/tests/test_dataformat.py b/tests/test_dataformat.py
@@ -1,4 +1,5 @@
 from chepy import Chepy
+import re
 
 
 def test_eval():
@@ -734,3 +735,17 @@ def test_uuencode():
         b"EKO{UUENC0DED_ENCRYPTED?}"
         in Chepy(data).to_uuencode().from_uuencode().remove_nullbytes().o
     )
+
+def test_lz77():
+    input_str = "(0,0,O)(0,0,M)(0,0,G)(1,1,G)(3,3, )(0,0,Y)(10,1,U)(4,1,A)(0,0,R)(0,0,E)(4,1,C)(0,0,L)(9,1,S)(6,2,T)(5,1, )(3,1,H)(7,2,F)(13,1,A)(1,1,A)(2,2,G)(36,7,C)(28,5,C)(6,5,W)(3,1,L)(1,1, )(0,0,N)(10,1,W)(40,3,I)(15,1, )(3,3,T)(48,6,G)(5,1,E)(0,0,K)(22,1,{)(25,1,I)(38,1,E)(1,1,E)(3,3,E)(7,7,E)(15,15,_)(38,3,O)(2,2,O)(5,5,O)(11,11,O)(3,3,_)(63,23,})"
+    array_of_arrays = []
+    regex = r"\((\d+),(\d+),([A-Z\s_{}]+)\)"
+    matches = re.findall(regex, input_str)
+
+    for match in matches:
+        param1, param2, param3 = match
+        array_of_arrays.append([int(param1), int(param2), param3])
+
+    assert b'EKO{' in Chepy(array_of_arrays).from_lz77().o
+
+    assert Chepy('OMGGGGGG').to_lz77(1).o[1] == [0, 0, 'M']