From d3e7c728e2df1479b7410eb4f7e065a32adde04c Mon Sep 17 00:00:00 2001
From: securisec <securisec@users.noreply.github.com>
Date: Wed, 3 Jul 2024 22:47:56 -0400
Subject: [PATCH] =?UTF-8?q?=F0=9F=97=93=20Jul=203,=202024=2010:46:05?=
 =?UTF-8?q?=E2=80=AFPM?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

🔥 to/from_base64 updated to align to cyberchef
🧪 tests added/updated
🤖 types added/updated
---
 .github/workflows/tests_multi_os.yml |   4 +-
 chepy/modules/dataformat.py          |  81 ++++++++------
 chepy/modules/dataformat.pyi         |   6 +-
 chepy/modules/internal/helpers.py    | 156 +++++++++++++++++++++++++++
 chepy/modules/utils.py               |  39 +------
 chepy/modules/utils.pyi              |   2 +-
 tests/test_core.py                   |  14 ++-
 tests/test_dataformat.py             |  32 +++---
 8 files changed, 240 insertions(+), 94 deletions(-)

diff --git a/.github/workflows/tests_multi_os.yml b/.github/workflows/tests_multi_os.yml
index f7bf3c7..3dd043b 100644
--- a/.github/workflows/tests_multi_os.yml
+++ b/.github/workflows/tests_multi_os.yml
@@ -59,8 +59,10 @@ jobs:
         pip install sphinx recommonmark pytest==8.1.1 pytest-cov==5.0.0 pyperclip 
         
     - name: Test with pytest
+      env:
+        COVERAGE_CORE: sysmon
       run: |
-        COVERAGE_CORE=sysmon pytest -v --disable-pytest-warnings --cov-report=xml --cov=chepy --cov-config=.coveragerc tests/
+        pytest -v --disable-pytest-warnings --cov-report=xml --cov=chepy --cov-config=.coveragerc tests/
         coverage report -m
         
     - name: Test plugins osx
diff --git a/chepy/modules/dataformat.py b/chepy/modules/dataformat.py
index 5dd4879..504cc6e 100644
--- a/chepy/modules/dataformat.py
+++ b/chepy/modules/dataformat.py
@@ -15,6 +15,7 @@
 import sqlite3
 import collections
 from random import randint
+import regex as re
 from .internal.constants import Encoding
 from .internal.helpers import (
     detect_delimiter,
@@ -23,6 +24,8 @@
     UUEncoderDecoder,
     Base92,
     Base45,
+    _Base64,
+    expand_alpha_range,
 )
 
 yaml = lazy_import.lazy_module("yaml")
@@ -494,7 +497,7 @@ def from_bytes(self) -> DataFormatT:
         return self
 
     @ChepyDecorators.call_stack
-    def to_base64(self, custom: str = None, url_safe: bool = False) -> DataFormatT:
+    def to_base64(self, alphabet: str = "standard") -> DataFormatT:
         """Encode as Base64
 
         Base64 is a notation for encoding arbitrary byte data using a
@@ -503,8 +506,7 @@ def to_base64(self, custom: str = None, url_safe: bool = False) -> DataFormatT:
         into an ASCII Base64 string.
 
         Args:
-            custom (str, optional): Provide a custom charset to base64 with
-            url_safe (bool, optional): Encode with url safe charset.
+            alphabet (str, optional): Provide a custom charset to base64 with. Valid values are: filename_safe, itoa64, radix_64, rot13, standard, unix_crypt, url_safe, xml, xxencoding, z64
 
         Returns:
             Chepy: The Chepy object.
@@ -515,27 +517,23 @@ def to_base64(self, custom: str = None, url_safe: bool = False) -> DataFormatT:
             >>> Chepy("Some data").to_base64(custom=custom).o
             b'IqxhNG/YMLFV'
         """
-        if url_safe:
-            self.state = base64.urlsafe_b64encode(self._convert_to_bytes()).replace(
-                b"=", b""
-            )
-            return self
-        if custom is not None:
-            x = base64.b64encode(self._convert_to_bytes())
-            std_base64chars = (
-                "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/="
-            )
-            self.state = bytes(
-                str(x)[2:-1].translate(str(x)[2:-1].maketrans(std_base64chars, custom)),
-                "utf-8",
+        data = self._convert_to_bytes()
+        alphabet = alphabet.strip()
+
+        char_set = expand_alpha_range(
+            _Base64.base_64_chars.get(alphabet, alphabet), join_by=""
+        )
+        if len(char_set) < 63 or len(char_set) > 66:  # pragma: no cover
+            raise ValueError(
+                "Invalid base64 chars. Should be 63-66 chars. " + str(len(char_set))
             )
-        else:
-            self.state = base64.b64encode(self._convert_to_bytes())
+
+        self.state = _Base64.encode_base64(data, alphabet=char_set)
         return self
 
     @ChepyDecorators.call_stack
     def from_base64(
-        self, custom: str = None, url_safe: bool = False, remove_whitespace: bool = True
+        self, alphabet: str = "standard", remove_non_alpha: bool = True
     ) -> DataFormatT:
         """Decode as Base64
 
@@ -545,9 +543,9 @@ def from_base64(
         into an ASCII Base64 string.
 
         Args:
-            custom (str, optional): Provide a custom charset to base64 with
-            url_safe (bool, optional): If true, decode url safe. Defaults to False
-            remove_whitespace(bool, optional): If true, all whitespaces are removed
+            alphabet (str, optional): Provide a custom charset to base64 with. Valid values are: filename_safe, itoa64, radix_64, rot13, standard, unix_crypt, url_safe, xml, xxencoding, z64
+            remove_whitespace(bool, optional): If true, all whitespaces are removed (Defaults to True)
+            remove_non_alpha(bool, optional): If true, all whitespaces are removed. (Defaults to True)
 
         Returns:
             Chepy: The Chepy object.
@@ -555,23 +553,36 @@ def from_base64(
         Examples:
             Base64 decode using a custom string
             >>> c = Chepy("QqxhNG/mMKtYPqoz64FVR42=")
-            >>> c.from_base64(custom="./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz")
+            >>> c.from_base64(alphabet="./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz")
             >>> c.out
             b"some random? data"
         """
-        if remove_whitespace:
-            data = self.remove_whitespace().o
-        data = self._convert_to_str()
-        if custom is not None:
-            std_base64chars = (
-                "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/="
+        alphabet = alphabet.strip()
+        char_set = expand_alpha_range(
+            _Base64.base_64_chars.get(alphabet, alphabet), join_by=""
+        )
+        if len(char_set) < 63 or len(char_set) > 65:  # pragma: no cover
+            raise ValueError(
+                "Invalid base64 chars. Should be 63-65 chars. " + str(len(char_set))
             )
-            data = data.translate(str.maketrans(custom, std_base64chars))
-        data += "=="
-        if url_safe:
-            self.state = base64.urlsafe_b64decode(data)
-        else:
-            self.state = base64.b64decode(data)
+
+        data = self._convert_to_str()
+
+        if remove_non_alpha:
+            data = re.sub("[^" + char_set + "]", "", data)
+
+        # if is_standard or alphabet == 'url_safe':
+        #     data += "=="
+        padding_needed = len(data) % 4
+        if padding_needed and alphabet != "url_safe":
+            data += "=" * (4 - padding_needed)
+
+        # if is_standard:
+        #     self.state = base64.b64decode(data)
+        # if alphabet == 'url_safe':
+        #     self.state = base64.urlsafe_b64decode(data)
+        # else:
+        self.state = _Base64.decode_base64(data, char_set)
         return self
 
     @ChepyDecorators.call_stack
diff --git a/chepy/modules/dataformat.pyi b/chepy/modules/dataformat.pyi
index f2b2906..aece364 100644
--- a/chepy/modules/dataformat.pyi
+++ b/chepy/modules/dataformat.pyi
@@ -28,8 +28,8 @@ class DataFormat(ChepyCore):
     def to_int(self: DataFormatT) -> DataFormatT: ...
     def to_bytes(self: DataFormatT) -> DataFormatT: ...
     def from_bytes(self: DataFormatT) -> DataFormatT: ...
-    def to_base64(self: DataFormatT, custom: str=...) -> DataFormatT: ...
-    def from_base64(self: DataFormatT, custom: str=..., url_safe: bool=..., remove_whitespace: bool=True) -> DataFormatT: ...
+    def to_base64(self: DataFormatT, alphabet: Literal[str, 'standard', 'url_safe', 'filename_safe', 'itoa64', 'xml', 'z64', 'radix_64', 'xxencoding', 'rot13', 'unix_crypt']='standard') -> DataFormatT: ...
+    def from_base64(self: DataFormatT, alphabet: Literal[str, 'standard', 'url_safe', 'filename_safe', 'itoa64', 'xml', 'z64', 'radix_64', 'xxencoding', 'rot13', 'unix_crypt']='standard', remove_non_alpha: bool=True) -> DataFormatT: ...
     def decode_bytes(self: DataFormatT, errors: Literal['ignore', 'backslashreplace', 'replace']=...) -> DataFormatT: ...
     def to_hex(self: DataFormatT, delimiter: str=..., join_by: str=...) -> DataFormatT: ...
     def from_hex(self: DataFormatT, delimiter: Union[str, None]=None, join_by: str='', replace: Union[bytes, None]=b'%|0x') -> DataFormatT: ...
@@ -71,7 +71,7 @@ class DataFormat(ChepyCore):
     def swap_strings(self: DataFormatT, by:int) -> DataFormatT: ...
     def to_string(self: DataFormatT) -> DataFormatT: ...
     def stringify(self: DataFormatT, compact:bool=...) -> DataFormatT: ...
-    def select(self: DataFormatT, start: int, end: int=None) -> DataFormatT: ...
+    def select(self: DataFormatT, start: int, end: Union[None, int]=None) -> DataFormatT: ...
     def length(self: DataFormatT) -> DataFormatT: ...
     def to_leetcode(self: DataFormatT, replace_space: str=...) -> DataFormatT: ...
     def substitute(self: DataFormatT, x: str=..., y: str=...) -> DataFormatT: ...
diff --git a/chepy/modules/internal/helpers.py b/chepy/modules/internal/helpers.py
index 76f0b04..c4dcb29 100644
--- a/chepy/modules/internal/helpers.py
+++ b/chepy/modules/internal/helpers.py
@@ -1,5 +1,6 @@
 from typing import List, Union
 import binascii
+import regex as re
 
 
 class Base45:
@@ -377,3 +378,158 @@ def rotate_left_carry(self):
         result[-1] |= carryBits
 
         return b"".join([chr(x).encode() for x in result])
+
+
+class _Base64:
+    base_64_chars = {
+        "standard": "A-Za-z0-9+/=",
+        "url_safe": "A-Za-z0-9-_",
+        "filename_safe": "A-Za-z0-9+\\-=",
+        "itoa64": "./0-9A-Za-z=",
+        "xml": "A-Za-z0-9_.",
+        # "y64": "A-Za-z0-9._-",
+        "z64": "0-9a-zA-Z+/=",
+        "radix_64": "0-9A-Za-z+/=",
+        # "uuencoding": " -_",
+        "xxencoding": "+\\-0-9A-Za-z",
+        # "binHex": "!-,-0-689@A-NP-VX-Z[`a-fh-mp-r",
+        "rot13": "N-ZA-Mn-za-m0-9+/=",
+        "unix_crypt": "./0-9A-Za-z",
+        # "atom128": "/128GhIoPQROSTeUbADfgHijKLM+n0pFWXY456xyzB7=39VaqrstJklmNuZvwcdEC",
+        # "megan35": "3GHIJKLMNOPQRSTUb=cdefghijklmnopWXYZ/12+406789VaqrstuvwxyzABCDEF5",
+        # "zong22": "ZKj9n+yf0wDVX1s/5YbdxSo=ILaUpPBCHg8uvNO4klm6iJGhQ7eFrWczAMEq3RTt2",
+        # "hazz15": "HNO4klm6ij9n+J2hyf0gzA8uvwDEq3X1Q7ZKeFrWcVTts/MRGYbdxSo=ILaUpPBC5",
+    }
+
+    @staticmethod
+    def decode_base64(data, alphabet):
+        output = []
+        i = 0
+
+        # Calculate the necessary padding
+        padding_required = (4 - len(data) % 4) % 4
+        data += padding_required * "="
+
+        while i < len(data):
+            enc1 = alphabet.index(data[i]) if i < len(data) and data[i] != "=" else 0
+            i += 1
+            enc2 = alphabet.index(data[i]) if i < len(data) and data[i] != "=" else 0
+            i += 1
+            enc3 = alphabet.index(data[i]) if i < len(data) and data[i] != "=" else 0
+            i += 1
+            enc4 = alphabet.index(data[i]) if i < len(data) and data[i] != "=" else 0
+            i += 1
+
+            chr1 = (enc1 << 2) | (enc2 >> 4)
+            chr2 = ((enc2 & 15) << 4) | (enc3 >> 2)
+            chr3 = ((enc3 & 3) << 6) | enc4
+
+            if 0 <= chr1 < 256:
+                output.append(chr1)
+            if 0 <= chr2 < 256 and data[i - 2] != "=":
+                output.append(chr2)
+            if 0 <= chr3 < 256 and data[i - 1] != "=":
+                output.append(chr3)
+
+        return bytes(output)
+
+    @staticmethod
+    def encode_base64(data: bytes, alphabet: str):
+        output = ""
+        i = 0
+        padding_char = (
+            "=" if alphabet[-1] == "=" else None
+        )  # Check if '=' is in the alphabet, otherwise use None
+
+        while i < len(data):
+            chr1 = data[i] if i < len(data) else 0
+            i += 1
+            chr2 = data[i] if i < len(data) else 0
+            i += 1
+            chr3 = data[i] if i < len(data) else 0
+            i += 1
+
+            enc1 = chr1 >> 2
+            enc2 = ((chr1 & 3) << 4) | (chr2 >> 4)
+            enc3 = ((chr2 & 15) << 2) | (chr3 >> 6)
+            enc4 = chr3 & 63
+
+            if i > len(data) + 1:
+                enc3 = 64
+                enc4 = 64
+            elif i > len(data):
+                enc4 = 64
+
+            output += alphabet[enc1]
+            output += alphabet[enc2]
+            output += (
+                alphabet[enc3]
+                if enc3 < 64
+                else (padding_char if padding_char is not None else "")
+            )
+            output += (
+                alphabet[enc4]
+                if enc4 < 64
+                else (padding_char if padding_char is not None else "")
+            )
+
+        # Remove padding characters if they are not part of the alphabet
+        if padding_char is None:
+            output = output.rstrip(
+                alphabet[-1]
+            )  # Strip the last character of the alphabet if it's not '='
+
+        return output
+
+
+def expand_alpha_range(alph_str: str, join_by: Union[str, None] = None):
+    def expand_range(start, end):
+        return [str(x) for x in range(int(start), int(end) + 1)]
+
+    def expand_char_range(start, end):
+        return [chr(x) for x in range(ord(start), ord(end) + 1)]
+
+    hold = []
+    i = 0
+    length = len(alph_str)
+
+    while i < length:
+        # Check for numeric ranges
+        if (
+            i < length - 2
+            and alph_str[i].isdigit()
+            and alph_str[i + 1] == "-"
+            and alph_str[i + 2].isdigit()
+        ):
+            start = ""
+            while i < length and alph_str[i].isdigit():
+                start += alph_str[i]
+                i += 1
+            i += 1  # Skip the '-'
+            end = ""
+            while i < length and alph_str[i].isdigit():
+                end += alph_str[i]
+                i += 1
+            hold.extend(expand_range(start, end))
+        elif (
+            i < length - 2
+            and alph_str[i].isalpha()
+            and alph_str[i + 1] == "-"
+            and alph_str[i + 2].isalpha()
+        ):
+            start = alph_str[i]
+            end = alph_str[i + 2]
+            hold.extend(expand_char_range(start, end))
+            i += 3
+        elif (
+            i < length - 2 and alph_str[i] == "\\" and alph_str[i + 1] == "-"
+        ):  # pragma: no cover
+            hold.append("-")
+            i += 2
+        else:
+            hold.append(alph_str[i])
+            i += 1
+
+    if join_by is not None:
+        return join_by.join(hold)
+    return hold
diff --git a/chepy/modules/utils.py b/chepy/modules/utils.py
index ed881b3..1d98344 100644
--- a/chepy/modules/utils.py
+++ b/chepy/modules/utils.py
@@ -3,6 +3,7 @@
 import difflib
 from collections import OrderedDict
 from typing import TypeVar, Union, Any
+from .internal.helpers import expand_alpha_range as _ex_al_range
 
 import chepy.modules.internal.colors as _int_colors
 
@@ -831,41 +832,5 @@ def expand_alpha_range(self, join_by: Union[str, None] = None):
             Chepy: The Chepy object.
         """
         alph_str = self._convert_to_str()
-        hold = []
-
-        def expand_range(start, end):
-            return [str(x) for x in range(int(start), int(end) + 1)]
-
-        def expand_char_range(start, end):
-            return [chr(x) for x in range(ord(start), ord(end) + 1)]
-
-        hold = []
-        i = 0
-        length = len(alph_str)
-
-        while i < length:
-            # Match numerical ranges like 10-20
-            num_match = re.match(r"(\d+)-(\d+)", alph_str[i:])
-            if num_match:
-                start, end = num_match.groups()
-                hold.extend(expand_range(start, end))
-                i += len(start) + len(end) + 1  # move past the number range
-            elif i < length - 2 and alph_str[i + 1] == "-" and alph_str[i] != "\\":
-                # Handle character ranges like a-z
-                start = alph_str[i]
-                end = alph_str[i + 2]
-                hold.extend(expand_char_range(start, end))
-                i += 2
-            elif (
-                i < length - 2 and alph_str[i] == "\\" and alph_str[i + 1] == "-"
-            ):  # pragma: no cover
-                hold.append("-")
-                i += 1
-            else:
-                hold.append(alph_str[i])
-            i += 1
-
-        if join_by is not None:
-            hold = join_by.join(hold)
-        self.state = hold
+        self.state = _ex_al_range(alph_str=alph_str, join_by=join_by)
         return self
diff --git a/chepy/modules/utils.pyi b/chepy/modules/utils.pyi
index c1ab35d..b55ecae 100644
--- a/chepy/modules/utils.pyi
+++ b/chepy/modules/utils.pyi
@@ -41,4 +41,4 @@ class Utils(ChepyCore):
     def drop_bytes(self: UtilsT, start: int, length: int) -> UtilsT: ...
     def without(self: UtilsT, *values: Any) -> UtilsT: ...
     def pick(self: UtilsT, *values: Any) -> UtilsT: ...
-    def expand_alpha_range(self, join_by: Union[str, None]=None) -> UtilsT: ...
+    def expand_alpha_range(self: UtilsT, join_by: Union[str, None]=None) -> UtilsT: ...
diff --git a/tests/test_core.py b/tests/test_core.py
index 4ea5c81..ea3dfc1 100644
--- a/tests/test_core.py
+++ b/tests/test_core.py
@@ -185,7 +185,7 @@ def test_run_recipe():
             recipes=[
                 {
                     "function": "from_base64",
-                    "args": {"custom": None},
+                    "args": {"alphabet": 'standard'},
                 },
                 {"function": "swap_case", "args": {}},
             ]
@@ -197,9 +197,11 @@ def test_run_recipe():
 
 def test_recipe():
     temp = str(Path(tempfile.gettempdir()) / os.urandom(24).hex())
-    Chepy("tests/files/encoding").load_file().reverse().rot_13().from_base64(
+    Chepy(
+        "tests/files/encoding"
+    ).load_file().reverse().rot_13().from_base64().from_base32(
         remove_whitespace=False
-    ).from_base32(remove_whitespace=False).str_from_hexdump().save_recipe(temp)
+    ).str_from_hexdump().save_recipe(temp)
 
     assert (
         Chepy("tests/files/encoding").load_recipe(temp).o
@@ -350,5 +352,7 @@ def test_dump_json():
         b"byte_key": b"\x00\x01",
     }
 
-    assert Chepy(data).dump_json().json_to_dict().get_by_key('byte_key').o == b'\x00\x01'
-    assert Chepy(True).dump_json().o == b'true'
\ No newline at end of file
+    assert (
+        Chepy(data).dump_json().json_to_dict().get_by_key("byte_key").o == b"\x00\x01"
+    )
+    assert Chepy(True).dump_json().o == b"true"
diff --git a/tests/test_dataformat.py b/tests/test_dataformat.py
index ba9e629..1c76725 100644
--- a/tests/test_dataformat.py
+++ b/tests/test_dataformat.py
@@ -117,28 +117,28 @@ def test_base32():
 
 def test_to_base64():
     assert Chepy("some data").to_base64().out.decode() == "c29tZSBkYXRh"
+    assert Chepy("test").to_base64(alphabet="url_safe").o == b"dGVzdA"
     assert (
-        Chepy("some random? data")
-        .to_base64(
-            custom="./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz="
-        )
-        .o
+        Chepy("some random? data").to_base64(alphabet="itoa64").o
         == b"QqxhNG/mMKtYPqoz64FVR42="
     )
-    assert Chepy("test").to_base64(url_safe=True).o == b"dGVzdA"
+    data = "some random data"
+    assert Chepy(data).to_base64("rot13").o == b"p29gMFOlLJ5xo20tMTS0LD=="
+    assert Chepy(data).to_base64("radix_64").o == b"SszjPI1oOMvaRsqWP65qOG=="
+    assert Chepy(data).to_base64("xml").o == b"c29tZSByYW5kb20gZGF0YQ"
+    assert Chepy(data).to_base64("xxencoding").o == b"QqxhNG-mMKtYPqoUN43oME"
+    assert Chepy(data).to_base64("filename_safe").o == b"c29tZSByYW5kb20gZGF0YQ=="
+    assert Chepy(data).to_base64("z64").o == b"sSZJpi1OomVArSQwp65Qog=="
+    assert Chepy(data).to_base64("unix_crypt").o == b"QqxhNG/mMKtYPqoUN43oME"
 
 
 def test_from_base64():
     assert Chepy("c29tZSByYW5kb20/IGRhdGE").from_base64().o == b"some random? data"
     assert (
-        Chepy("QqxhNG/mMKtYPqoz64FVR42=")
-        .from_base64(
-            custom="./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz="
-        )
-        .o
+        Chepy("QqxhNG/mMKtYPqoz64FVR42=").from_base64(alphabet="itoa64").o
         == b"some random? data"
     )
-    assert Chepy("dGVzdA").from_base64(url_safe=True).o == b"test"
+    assert Chepy("dGVzdA").from_base64(alphabet="url_safe").o == b"test"
     assert (
         Chepy("""YW1hdGV1cnNDVEZ7cGljMF93NDVuN19nMDBkX24wdWdoXzUwX2lfNzAwa19zb20zX2NyMzR0MXYz
 X2wxYjNydDEzNV9hZGU4ODIwZX0=""")
@@ -146,6 +146,14 @@ def test_from_base64():
         .o
         == b"amateursCTF{pic0_w45n7_g00d_n0ugh_50_i_700k_som3_cr34t1v3_l1b3rt135_ade8820e}"
     )
+    assert Chepy("q6lIr6YwtSZOr6g=").from_base64("z64").o == b"hello world"
+    assert Chepy("nTIfoT8tq29loTD=").from_base64("rot13").o == b"hello world"
+    data = b"some random data"
+    assert Chepy("c29tZSByYW5kb20gZGF0YQ").from_base64("xml").o == data
+    assert Chepy("QqxhNG-mMKtYPqoUN43oME").from_base64("xxencoding").o == data
+    assert Chepy("c29tZSByYW5kb20gZGF0YQ==").from_base64("filename_safe").o == data
+    assert Chepy("sSZJpi1OomVArSQwp65Qog==").from_base64("z64").o == data
+    assert Chepy("QqxhNG/mMKtYPqoUN43oME").from_base64("unix_crypt").o == data
 
 
 def test_decode_bytes():