From d3e7c728e2df1479b7410eb4f7e065a32adde04c Mon Sep 17 00:00:00 2001 From: securisec Date: Wed, 3 Jul 2024 22:47:56 -0400 Subject: [PATCH] =?UTF-8?q?=F0=9F=97=93=20Jul=203,=202024=2010:46:05?= =?UTF-8?q?=E2=80=AFPM?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ๐Ÿ”ฅ to/from_base64 updated to align to cyberchef ๐Ÿงช tests added/updated ๐Ÿค– types added/updated --- .github/workflows/tests_multi_os.yml | 4 +- chepy/modules/dataformat.py | 81 ++++++++------ chepy/modules/dataformat.pyi | 6 +- chepy/modules/internal/helpers.py | 156 +++++++++++++++++++++++++++ chepy/modules/utils.py | 39 +------ chepy/modules/utils.pyi | 2 +- tests/test_core.py | 14 ++- tests/test_dataformat.py | 32 +++--- 8 files changed, 240 insertions(+), 94 deletions(-) diff --git a/.github/workflows/tests_multi_os.yml b/.github/workflows/tests_multi_os.yml index f7bf3c7..3dd043b 100644 --- a/.github/workflows/tests_multi_os.yml +++ b/.github/workflows/tests_multi_os.yml @@ -59,8 +59,10 @@ jobs: pip install sphinx recommonmark pytest==8.1.1 pytest-cov==5.0.0 pyperclip - name: Test with pytest + env: + COVERAGE_CORE: sysmon run: | - COVERAGE_CORE=sysmon pytest -v --disable-pytest-warnings --cov-report=xml --cov=chepy --cov-config=.coveragerc tests/ + pytest -v --disable-pytest-warnings --cov-report=xml --cov=chepy --cov-config=.coveragerc tests/ coverage report -m - name: Test plugins osx diff --git a/chepy/modules/dataformat.py b/chepy/modules/dataformat.py index 5dd4879..504cc6e 100644 --- a/chepy/modules/dataformat.py +++ b/chepy/modules/dataformat.py @@ -15,6 +15,7 @@ import sqlite3 import collections from random import randint +import regex as re from .internal.constants import Encoding from .internal.helpers import ( detect_delimiter, @@ -23,6 +24,8 @@ UUEncoderDecoder, Base92, Base45, + _Base64, + expand_alpha_range, ) yaml = lazy_import.lazy_module("yaml") @@ -494,7 +497,7 @@ def from_bytes(self) -> DataFormatT: return self @ChepyDecorators.call_stack - def to_base64(self, custom: str = None, url_safe: bool = False) -> DataFormatT: + def to_base64(self, alphabet: str = "standard") -> DataFormatT: """Encode as Base64 Base64 is a notation for encoding arbitrary byte data using a @@ -503,8 +506,7 @@ def to_base64(self, custom: str = None, url_safe: bool = False) -> DataFormatT: into an ASCII Base64 string. Args: - custom (str, optional): Provide a custom charset to base64 with - url_safe (bool, optional): Encode with url safe charset. + alphabet (str, optional): Provide a custom charset to base64 with. Valid values are: filename_safe, itoa64, radix_64, rot13, standard, unix_crypt, url_safe, xml, xxencoding, z64 Returns: Chepy: The Chepy object. @@ -515,27 +517,23 @@ def to_base64(self, custom: str = None, url_safe: bool = False) -> DataFormatT: >>> Chepy("Some data").to_base64(custom=custom).o b'IqxhNG/YMLFV' """ - if url_safe: - self.state = base64.urlsafe_b64encode(self._convert_to_bytes()).replace( - b"=", b"" - ) - return self - if custom is not None: - x = base64.b64encode(self._convert_to_bytes()) - std_base64chars = ( - "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=" - ) - self.state = bytes( - str(x)[2:-1].translate(str(x)[2:-1].maketrans(std_base64chars, custom)), - "utf-8", + data = self._convert_to_bytes() + alphabet = alphabet.strip() + + char_set = expand_alpha_range( + _Base64.base_64_chars.get(alphabet, alphabet), join_by="" + ) + if len(char_set) < 63 or len(char_set) > 66: # pragma: no cover + raise ValueError( + "Invalid base64 chars. Should be 63-66 chars. " + str(len(char_set)) ) - else: - self.state = base64.b64encode(self._convert_to_bytes()) + + self.state = _Base64.encode_base64(data, alphabet=char_set) return self @ChepyDecorators.call_stack def from_base64( - self, custom: str = None, url_safe: bool = False, remove_whitespace: bool = True + self, alphabet: str = "standard", remove_non_alpha: bool = True ) -> DataFormatT: """Decode as Base64 @@ -545,9 +543,9 @@ def from_base64( into an ASCII Base64 string. Args: - custom (str, optional): Provide a custom charset to base64 with - url_safe (bool, optional): If true, decode url safe. Defaults to False - remove_whitespace(bool, optional): If true, all whitespaces are removed + alphabet (str, optional): Provide a custom charset to base64 with. Valid values are: filename_safe, itoa64, radix_64, rot13, standard, unix_crypt, url_safe, xml, xxencoding, z64 + remove_whitespace(bool, optional): If true, all whitespaces are removed (Defaults to True) + remove_non_alpha(bool, optional): If true, all whitespaces are removed. (Defaults to True) Returns: Chepy: The Chepy object. @@ -555,23 +553,36 @@ def from_base64( Examples: Base64 decode using a custom string >>> c = Chepy("QqxhNG/mMKtYPqoz64FVR42=") - >>> c.from_base64(custom="./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz") + >>> c.from_base64(alphabet="./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz") >>> c.out b"some random? data" """ - if remove_whitespace: - data = self.remove_whitespace().o - data = self._convert_to_str() - if custom is not None: - std_base64chars = ( - "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=" + alphabet = alphabet.strip() + char_set = expand_alpha_range( + _Base64.base_64_chars.get(alphabet, alphabet), join_by="" + ) + if len(char_set) < 63 or len(char_set) > 65: # pragma: no cover + raise ValueError( + "Invalid base64 chars. Should be 63-65 chars. " + str(len(char_set)) ) - data = data.translate(str.maketrans(custom, std_base64chars)) - data += "==" - if url_safe: - self.state = base64.urlsafe_b64decode(data) - else: - self.state = base64.b64decode(data) + + data = self._convert_to_str() + + if remove_non_alpha: + data = re.sub("[^" + char_set + "]", "", data) + + # if is_standard or alphabet == 'url_safe': + # data += "==" + padding_needed = len(data) % 4 + if padding_needed and alphabet != "url_safe": + data += "=" * (4 - padding_needed) + + # if is_standard: + # self.state = base64.b64decode(data) + # if alphabet == 'url_safe': + # self.state = base64.urlsafe_b64decode(data) + # else: + self.state = _Base64.decode_base64(data, char_set) return self @ChepyDecorators.call_stack diff --git a/chepy/modules/dataformat.pyi b/chepy/modules/dataformat.pyi index f2b2906..aece364 100644 --- a/chepy/modules/dataformat.pyi +++ b/chepy/modules/dataformat.pyi @@ -28,8 +28,8 @@ class DataFormat(ChepyCore): def to_int(self: DataFormatT) -> DataFormatT: ... def to_bytes(self: DataFormatT) -> DataFormatT: ... def from_bytes(self: DataFormatT) -> DataFormatT: ... - def to_base64(self: DataFormatT, custom: str=...) -> DataFormatT: ... - def from_base64(self: DataFormatT, custom: str=..., url_safe: bool=..., remove_whitespace: bool=True) -> DataFormatT: ... + def to_base64(self: DataFormatT, alphabet: Literal[str, 'standard', 'url_safe', 'filename_safe', 'itoa64', 'xml', 'z64', 'radix_64', 'xxencoding', 'rot13', 'unix_crypt']='standard') -> DataFormatT: ... + def from_base64(self: DataFormatT, alphabet: Literal[str, 'standard', 'url_safe', 'filename_safe', 'itoa64', 'xml', 'z64', 'radix_64', 'xxencoding', 'rot13', 'unix_crypt']='standard', remove_non_alpha: bool=True) -> DataFormatT: ... def decode_bytes(self: DataFormatT, errors: Literal['ignore', 'backslashreplace', 'replace']=...) -> DataFormatT: ... def to_hex(self: DataFormatT, delimiter: str=..., join_by: str=...) -> DataFormatT: ... def from_hex(self: DataFormatT, delimiter: Union[str, None]=None, join_by: str='', replace: Union[bytes, None]=b'%|0x') -> DataFormatT: ... @@ -71,7 +71,7 @@ class DataFormat(ChepyCore): def swap_strings(self: DataFormatT, by:int) -> DataFormatT: ... def to_string(self: DataFormatT) -> DataFormatT: ... def stringify(self: DataFormatT, compact:bool=...) -> DataFormatT: ... - def select(self: DataFormatT, start: int, end: int=None) -> DataFormatT: ... + def select(self: DataFormatT, start: int, end: Union[None, int]=None) -> DataFormatT: ... def length(self: DataFormatT) -> DataFormatT: ... def to_leetcode(self: DataFormatT, replace_space: str=...) -> DataFormatT: ... def substitute(self: DataFormatT, x: str=..., y: str=...) -> DataFormatT: ... diff --git a/chepy/modules/internal/helpers.py b/chepy/modules/internal/helpers.py index 76f0b04..c4dcb29 100644 --- a/chepy/modules/internal/helpers.py +++ b/chepy/modules/internal/helpers.py @@ -1,5 +1,6 @@ from typing import List, Union import binascii +import regex as re class Base45: @@ -377,3 +378,158 @@ def rotate_left_carry(self): result[-1] |= carryBits return b"".join([chr(x).encode() for x in result]) + + +class _Base64: + base_64_chars = { + "standard": "A-Za-z0-9+/=", + "url_safe": "A-Za-z0-9-_", + "filename_safe": "A-Za-z0-9+\\-=", + "itoa64": "./0-9A-Za-z=", + "xml": "A-Za-z0-9_.", + # "y64": "A-Za-z0-9._-", + "z64": "0-9a-zA-Z+/=", + "radix_64": "0-9A-Za-z+/=", + # "uuencoding": " -_", + "xxencoding": "+\\-0-9A-Za-z", + # "binHex": "!-,-0-689@A-NP-VX-Z[`a-fh-mp-r", + "rot13": "N-ZA-Mn-za-m0-9+/=", + "unix_crypt": "./0-9A-Za-z", + # "atom128": "/128GhIoPQROSTeUbADfgHijKLM+n0pFWXY456xyzB7=39VaqrstJklmNuZvwcdEC", + # "megan35": "3GHIJKLMNOPQRSTUb=cdefghijklmnopWXYZ/12+406789VaqrstuvwxyzABCDEF5", + # "zong22": "ZKj9n+yf0wDVX1s/5YbdxSo=ILaUpPBCHg8uvNO4klm6iJGhQ7eFrWczAMEq3RTt2", + # "hazz15": "HNO4klm6ij9n+J2hyf0gzA8uvwDEq3X1Q7ZKeFrWcVTts/MRGYbdxSo=ILaUpPBC5", + } + + @staticmethod + def decode_base64(data, alphabet): + output = [] + i = 0 + + # Calculate the necessary padding + padding_required = (4 - len(data) % 4) % 4 + data += padding_required * "=" + + while i < len(data): + enc1 = alphabet.index(data[i]) if i < len(data) and data[i] != "=" else 0 + i += 1 + enc2 = alphabet.index(data[i]) if i < len(data) and data[i] != "=" else 0 + i += 1 + enc3 = alphabet.index(data[i]) if i < len(data) and data[i] != "=" else 0 + i += 1 + enc4 = alphabet.index(data[i]) if i < len(data) and data[i] != "=" else 0 + i += 1 + + chr1 = (enc1 << 2) | (enc2 >> 4) + chr2 = ((enc2 & 15) << 4) | (enc3 >> 2) + chr3 = ((enc3 & 3) << 6) | enc4 + + if 0 <= chr1 < 256: + output.append(chr1) + if 0 <= chr2 < 256 and data[i - 2] != "=": + output.append(chr2) + if 0 <= chr3 < 256 and data[i - 1] != "=": + output.append(chr3) + + return bytes(output) + + @staticmethod + def encode_base64(data: bytes, alphabet: str): + output = "" + i = 0 + padding_char = ( + "=" if alphabet[-1] == "=" else None + ) # Check if '=' is in the alphabet, otherwise use None + + while i < len(data): + chr1 = data[i] if i < len(data) else 0 + i += 1 + chr2 = data[i] if i < len(data) else 0 + i += 1 + chr3 = data[i] if i < len(data) else 0 + i += 1 + + enc1 = chr1 >> 2 + enc2 = ((chr1 & 3) << 4) | (chr2 >> 4) + enc3 = ((chr2 & 15) << 2) | (chr3 >> 6) + enc4 = chr3 & 63 + + if i > len(data) + 1: + enc3 = 64 + enc4 = 64 + elif i > len(data): + enc4 = 64 + + output += alphabet[enc1] + output += alphabet[enc2] + output += ( + alphabet[enc3] + if enc3 < 64 + else (padding_char if padding_char is not None else "") + ) + output += ( + alphabet[enc4] + if enc4 < 64 + else (padding_char if padding_char is not None else "") + ) + + # Remove padding characters if they are not part of the alphabet + if padding_char is None: + output = output.rstrip( + alphabet[-1] + ) # Strip the last character of the alphabet if it's not '=' + + return output + + +def expand_alpha_range(alph_str: str, join_by: Union[str, None] = None): + def expand_range(start, end): + return [str(x) for x in range(int(start), int(end) + 1)] + + def expand_char_range(start, end): + return [chr(x) for x in range(ord(start), ord(end) + 1)] + + hold = [] + i = 0 + length = len(alph_str) + + while i < length: + # Check for numeric ranges + if ( + i < length - 2 + and alph_str[i].isdigit() + and alph_str[i + 1] == "-" + and alph_str[i + 2].isdigit() + ): + start = "" + while i < length and alph_str[i].isdigit(): + start += alph_str[i] + i += 1 + i += 1 # Skip the '-' + end = "" + while i < length and alph_str[i].isdigit(): + end += alph_str[i] + i += 1 + hold.extend(expand_range(start, end)) + elif ( + i < length - 2 + and alph_str[i].isalpha() + and alph_str[i + 1] == "-" + and alph_str[i + 2].isalpha() + ): + start = alph_str[i] + end = alph_str[i + 2] + hold.extend(expand_char_range(start, end)) + i += 3 + elif ( + i < length - 2 and alph_str[i] == "\\" and alph_str[i + 1] == "-" + ): # pragma: no cover + hold.append("-") + i += 2 + else: + hold.append(alph_str[i]) + i += 1 + + if join_by is not None: + return join_by.join(hold) + return hold diff --git a/chepy/modules/utils.py b/chepy/modules/utils.py index ed881b3..1d98344 100644 --- a/chepy/modules/utils.py +++ b/chepy/modules/utils.py @@ -3,6 +3,7 @@ import difflib from collections import OrderedDict from typing import TypeVar, Union, Any +from .internal.helpers import expand_alpha_range as _ex_al_range import chepy.modules.internal.colors as _int_colors @@ -831,41 +832,5 @@ def expand_alpha_range(self, join_by: Union[str, None] = None): Chepy: The Chepy object. """ alph_str = self._convert_to_str() - hold = [] - - def expand_range(start, end): - return [str(x) for x in range(int(start), int(end) + 1)] - - def expand_char_range(start, end): - return [chr(x) for x in range(ord(start), ord(end) + 1)] - - hold = [] - i = 0 - length = len(alph_str) - - while i < length: - # Match numerical ranges like 10-20 - num_match = re.match(r"(\d+)-(\d+)", alph_str[i:]) - if num_match: - start, end = num_match.groups() - hold.extend(expand_range(start, end)) - i += len(start) + len(end) + 1 # move past the number range - elif i < length - 2 and alph_str[i + 1] == "-" and alph_str[i] != "\\": - # Handle character ranges like a-z - start = alph_str[i] - end = alph_str[i + 2] - hold.extend(expand_char_range(start, end)) - i += 2 - elif ( - i < length - 2 and alph_str[i] == "\\" and alph_str[i + 1] == "-" - ): # pragma: no cover - hold.append("-") - i += 1 - else: - hold.append(alph_str[i]) - i += 1 - - if join_by is not None: - hold = join_by.join(hold) - self.state = hold + self.state = _ex_al_range(alph_str=alph_str, join_by=join_by) return self diff --git a/chepy/modules/utils.pyi b/chepy/modules/utils.pyi index c1ab35d..b55ecae 100644 --- a/chepy/modules/utils.pyi +++ b/chepy/modules/utils.pyi @@ -41,4 +41,4 @@ class Utils(ChepyCore): def drop_bytes(self: UtilsT, start: int, length: int) -> UtilsT: ... def without(self: UtilsT, *values: Any) -> UtilsT: ... def pick(self: UtilsT, *values: Any) -> UtilsT: ... - def expand_alpha_range(self, join_by: Union[str, None]=None) -> UtilsT: ... + def expand_alpha_range(self: UtilsT, join_by: Union[str, None]=None) -> UtilsT: ... diff --git a/tests/test_core.py b/tests/test_core.py index 4ea5c81..ea3dfc1 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -185,7 +185,7 @@ def test_run_recipe(): recipes=[ { "function": "from_base64", - "args": {"custom": None}, + "args": {"alphabet": 'standard'}, }, {"function": "swap_case", "args": {}}, ] @@ -197,9 +197,11 @@ def test_run_recipe(): def test_recipe(): temp = str(Path(tempfile.gettempdir()) / os.urandom(24).hex()) - Chepy("tests/files/encoding").load_file().reverse().rot_13().from_base64( + Chepy( + "tests/files/encoding" + ).load_file().reverse().rot_13().from_base64().from_base32( remove_whitespace=False - ).from_base32(remove_whitespace=False).str_from_hexdump().save_recipe(temp) + ).str_from_hexdump().save_recipe(temp) assert ( Chepy("tests/files/encoding").load_recipe(temp).o @@ -350,5 +352,7 @@ def test_dump_json(): b"byte_key": b"\x00\x01", } - assert Chepy(data).dump_json().json_to_dict().get_by_key('byte_key').o == b'\x00\x01' - assert Chepy(True).dump_json().o == b'true' \ No newline at end of file + assert ( + Chepy(data).dump_json().json_to_dict().get_by_key("byte_key").o == b"\x00\x01" + ) + assert Chepy(True).dump_json().o == b"true" diff --git a/tests/test_dataformat.py b/tests/test_dataformat.py index ba9e629..1c76725 100644 --- a/tests/test_dataformat.py +++ b/tests/test_dataformat.py @@ -117,28 +117,28 @@ def test_base32(): def test_to_base64(): assert Chepy("some data").to_base64().out.decode() == "c29tZSBkYXRh" + assert Chepy("test").to_base64(alphabet="url_safe").o == b"dGVzdA" assert ( - Chepy("some random? data") - .to_base64( - custom="./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz=" - ) - .o + Chepy("some random? data").to_base64(alphabet="itoa64").o == b"QqxhNG/mMKtYPqoz64FVR42=" ) - assert Chepy("test").to_base64(url_safe=True).o == b"dGVzdA" + data = "some random data" + assert Chepy(data).to_base64("rot13").o == b"p29gMFOlLJ5xo20tMTS0LD==" + assert Chepy(data).to_base64("radix_64").o == b"SszjPI1oOMvaRsqWP65qOG==" + assert Chepy(data).to_base64("xml").o == b"c29tZSByYW5kb20gZGF0YQ" + assert Chepy(data).to_base64("xxencoding").o == b"QqxhNG-mMKtYPqoUN43oME" + assert Chepy(data).to_base64("filename_safe").o == b"c29tZSByYW5kb20gZGF0YQ==" + assert Chepy(data).to_base64("z64").o == b"sSZJpi1OomVArSQwp65Qog==" + assert Chepy(data).to_base64("unix_crypt").o == b"QqxhNG/mMKtYPqoUN43oME" def test_from_base64(): assert Chepy("c29tZSByYW5kb20/IGRhdGE").from_base64().o == b"some random? data" assert ( - Chepy("QqxhNG/mMKtYPqoz64FVR42=") - .from_base64( - custom="./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz=" - ) - .o + Chepy("QqxhNG/mMKtYPqoz64FVR42=").from_base64(alphabet="itoa64").o == b"some random? data" ) - assert Chepy("dGVzdA").from_base64(url_safe=True).o == b"test" + assert Chepy("dGVzdA").from_base64(alphabet="url_safe").o == b"test" assert ( Chepy("""YW1hdGV1cnNDVEZ7cGljMF93NDVuN19nMDBkX24wdWdoXzUwX2lfNzAwa19zb20zX2NyMzR0MXYz X2wxYjNydDEzNV9hZGU4ODIwZX0=""") @@ -146,6 +146,14 @@ def test_from_base64(): .o == b"amateursCTF{pic0_w45n7_g00d_n0ugh_50_i_700k_som3_cr34t1v3_l1b3rt135_ade8820e}" ) + assert Chepy("q6lIr6YwtSZOr6g=").from_base64("z64").o == b"hello world" + assert Chepy("nTIfoT8tq29loTD=").from_base64("rot13").o == b"hello world" + data = b"some random data" + assert Chepy("c29tZSByYW5kb20gZGF0YQ").from_base64("xml").o == data + assert Chepy("QqxhNG-mMKtYPqoUN43oME").from_base64("xxencoding").o == data + assert Chepy("c29tZSByYW5kb20gZGF0YQ==").from_base64("filename_safe").o == data + assert Chepy("sSZJpi1OomVArSQwp65Qog==").from_base64("z64").o == data + assert Chepy("QqxhNG/mMKtYPqoUN43oME").from_base64("unix_crypt").o == data def test_decode_bytes():