Skip to content

Commit

Permalink
🗓 Jul 3, 2024 10:46:05 PM
Browse files Browse the repository at this point in the history
🔥 to/from_base64 updated to align to cyberchef
🧪 tests added/updated
🤖 types added/updated
  • Loading branch information
securisec committed Jul 4, 2024
1 parent aae8022 commit d3e7c72
Show file tree
Hide file tree
Showing 8 changed files with 240 additions and 94 deletions.
4 changes: 3 additions & 1 deletion .github/workflows/tests_multi_os.yml
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,10 @@ jobs:
pip install sphinx recommonmark pytest==8.1.1 pytest-cov==5.0.0 pyperclip
- name: Test with pytest
env:
COVERAGE_CORE: sysmon
run: |
COVERAGE_CORE=sysmon pytest -v --disable-pytest-warnings --cov-report=xml --cov=chepy --cov-config=.coveragerc tests/
pytest -v --disable-pytest-warnings --cov-report=xml --cov=chepy --cov-config=.coveragerc tests/
coverage report -m
- name: Test plugins osx
Expand Down
81 changes: 46 additions & 35 deletions chepy/modules/dataformat.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import sqlite3
import collections
from random import randint
import regex as re
from .internal.constants import Encoding
from .internal.helpers import (
detect_delimiter,
Expand All @@ -23,6 +24,8 @@
UUEncoderDecoder,
Base92,
Base45,
_Base64,
expand_alpha_range,
)

yaml = lazy_import.lazy_module("yaml")
Expand Down Expand Up @@ -494,7 +497,7 @@ def from_bytes(self) -> DataFormatT:
return self

@ChepyDecorators.call_stack
def to_base64(self, custom: str = None, url_safe: bool = False) -> DataFormatT:
def to_base64(self, alphabet: str = "standard") -> DataFormatT:
"""Encode as Base64
Base64 is a notation for encoding arbitrary byte data using a
Expand All @@ -503,8 +506,7 @@ def to_base64(self, custom: str = None, url_safe: bool = False) -> DataFormatT:
into an ASCII Base64 string.
Args:
custom (str, optional): Provide a custom charset to base64 with
url_safe (bool, optional): Encode with url safe charset.
alphabet (str, optional): Provide a custom charset to base64 with. Valid values are: filename_safe, itoa64, radix_64, rot13, standard, unix_crypt, url_safe, xml, xxencoding, z64
Returns:
Chepy: The Chepy object.
Expand All @@ -515,27 +517,23 @@ def to_base64(self, custom: str = None, url_safe: bool = False) -> DataFormatT:
>>> Chepy("Some data").to_base64(custom=custom).o
b'IqxhNG/YMLFV'
"""
if url_safe:
self.state = base64.urlsafe_b64encode(self._convert_to_bytes()).replace(
b"=", b""
)
return self
if custom is not None:
x = base64.b64encode(self._convert_to_bytes())
std_base64chars = (
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/="
)
self.state = bytes(
str(x)[2:-1].translate(str(x)[2:-1].maketrans(std_base64chars, custom)),
"utf-8",
data = self._convert_to_bytes()
alphabet = alphabet.strip()

char_set = expand_alpha_range(
_Base64.base_64_chars.get(alphabet, alphabet), join_by=""
)
if len(char_set) < 63 or len(char_set) > 66: # pragma: no cover
raise ValueError(
"Invalid base64 chars. Should be 63-66 chars. " + str(len(char_set))
)
else:
self.state = base64.b64encode(self._convert_to_bytes())

self.state = _Base64.encode_base64(data, alphabet=char_set)
return self

@ChepyDecorators.call_stack
def from_base64(
self, custom: str = None, url_safe: bool = False, remove_whitespace: bool = True
self, alphabet: str = "standard", remove_non_alpha: bool = True
) -> DataFormatT:
"""Decode as Base64
Expand All @@ -545,33 +543,46 @@ def from_base64(
into an ASCII Base64 string.
Args:
custom (str, optional): Provide a custom charset to base64 with
url_safe (bool, optional): If true, decode url safe. Defaults to False
remove_whitespace(bool, optional): If true, all whitespaces are removed
alphabet (str, optional): Provide a custom charset to base64 with. Valid values are: filename_safe, itoa64, radix_64, rot13, standard, unix_crypt, url_safe, xml, xxencoding, z64
remove_whitespace(bool, optional): If true, all whitespaces are removed (Defaults to True)
remove_non_alpha(bool, optional): If true, all whitespaces are removed. (Defaults to True)
Returns:
Chepy: The Chepy object.
Examples:
Base64 decode using a custom string
>>> c = Chepy("QqxhNG/mMKtYPqoz64FVR42=")
>>> c.from_base64(custom="./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz")
>>> c.from_base64(alphabet="./0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz")
>>> c.out
b"some random? data"
"""
if remove_whitespace:
data = self.remove_whitespace().o
data = self._convert_to_str()
if custom is not None:
std_base64chars = (
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/="
alphabet = alphabet.strip()
char_set = expand_alpha_range(
_Base64.base_64_chars.get(alphabet, alphabet), join_by=""
)
if len(char_set) < 63 or len(char_set) > 65: # pragma: no cover
raise ValueError(
"Invalid base64 chars. Should be 63-65 chars. " + str(len(char_set))
)
data = data.translate(str.maketrans(custom, std_base64chars))
data += "=="
if url_safe:
self.state = base64.urlsafe_b64decode(data)
else:
self.state = base64.b64decode(data)

data = self._convert_to_str()

if remove_non_alpha:
data = re.sub("[^" + char_set + "]", "", data)

# if is_standard or alphabet == 'url_safe':
# data += "=="
padding_needed = len(data) % 4
if padding_needed and alphabet != "url_safe":
data += "=" * (4 - padding_needed)

# if is_standard:
# self.state = base64.b64decode(data)
# if alphabet == 'url_safe':
# self.state = base64.urlsafe_b64decode(data)
# else:
self.state = _Base64.decode_base64(data, char_set)
return self

@ChepyDecorators.call_stack
Expand Down
6 changes: 3 additions & 3 deletions chepy/modules/dataformat.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@ class DataFormat(ChepyCore):
def to_int(self: DataFormatT) -> DataFormatT: ...
def to_bytes(self: DataFormatT) -> DataFormatT: ...
def from_bytes(self: DataFormatT) -> DataFormatT: ...
def to_base64(self: DataFormatT, custom: str=...) -> DataFormatT: ...
def from_base64(self: DataFormatT, custom: str=..., url_safe: bool=..., remove_whitespace: bool=True) -> DataFormatT: ...
def to_base64(self: DataFormatT, alphabet: Literal[str, 'standard', 'url_safe', 'filename_safe', 'itoa64', 'xml', 'z64', 'radix_64', 'xxencoding', 'rot13', 'unix_crypt']='standard') -> DataFormatT: ...
def from_base64(self: DataFormatT, alphabet: Literal[str, 'standard', 'url_safe', 'filename_safe', 'itoa64', 'xml', 'z64', 'radix_64', 'xxencoding', 'rot13', 'unix_crypt']='standard', remove_non_alpha: bool=True) -> DataFormatT: ...
def decode_bytes(self: DataFormatT, errors: Literal['ignore', 'backslashreplace', 'replace']=...) -> DataFormatT: ...
def to_hex(self: DataFormatT, delimiter: str=..., join_by: str=...) -> DataFormatT: ...
def from_hex(self: DataFormatT, delimiter: Union[str, None]=None, join_by: str='', replace: Union[bytes, None]=b'%|0x') -> DataFormatT: ...
Expand Down Expand Up @@ -71,7 +71,7 @@ class DataFormat(ChepyCore):
def swap_strings(self: DataFormatT, by:int) -> DataFormatT: ...
def to_string(self: DataFormatT) -> DataFormatT: ...
def stringify(self: DataFormatT, compact:bool=...) -> DataFormatT: ...
def select(self: DataFormatT, start: int, end: int=None) -> DataFormatT: ...
def select(self: DataFormatT, start: int, end: Union[None, int]=None) -> DataFormatT: ...
def length(self: DataFormatT) -> DataFormatT: ...
def to_leetcode(self: DataFormatT, replace_space: str=...) -> DataFormatT: ...
def substitute(self: DataFormatT, x: str=..., y: str=...) -> DataFormatT: ...
Expand Down
156 changes: 156 additions & 0 deletions chepy/modules/internal/helpers.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from typing import List, Union
import binascii
import regex as re


class Base45:
Expand Down Expand Up @@ -377,3 +378,158 @@ def rotate_left_carry(self):
result[-1] |= carryBits

return b"".join([chr(x).encode() for x in result])


class _Base64:
base_64_chars = {
"standard": "A-Za-z0-9+/=",
"url_safe": "A-Za-z0-9-_",
"filename_safe": "A-Za-z0-9+\\-=",
"itoa64": "./0-9A-Za-z=",
"xml": "A-Za-z0-9_.",
# "y64": "A-Za-z0-9._-",
"z64": "0-9a-zA-Z+/=",
"radix_64": "0-9A-Za-z+/=",
# "uuencoding": " -_",
"xxencoding": "+\\-0-9A-Za-z",
# "binHex": "!-,-0-689@A-NP-VX-Z[`a-fh-mp-r",
"rot13": "N-ZA-Mn-za-m0-9+/=",
"unix_crypt": "./0-9A-Za-z",
# "atom128": "/128GhIoPQROSTeUbADfgHijKLM+n0pFWXY456xyzB7=39VaqrstJklmNuZvwcdEC",
# "megan35": "3GHIJKLMNOPQRSTUb=cdefghijklmnopWXYZ/12+406789VaqrstuvwxyzABCDEF5",
# "zong22": "ZKj9n+yf0wDVX1s/5YbdxSo=ILaUpPBCHg8uvNO4klm6iJGhQ7eFrWczAMEq3RTt2",
# "hazz15": "HNO4klm6ij9n+J2hyf0gzA8uvwDEq3X1Q7ZKeFrWcVTts/MRGYbdxSo=ILaUpPBC5",
}

@staticmethod
def decode_base64(data, alphabet):
output = []
i = 0

# Calculate the necessary padding
padding_required = (4 - len(data) % 4) % 4
data += padding_required * "="

while i < len(data):
enc1 = alphabet.index(data[i]) if i < len(data) and data[i] != "=" else 0
i += 1
enc2 = alphabet.index(data[i]) if i < len(data) and data[i] != "=" else 0
i += 1
enc3 = alphabet.index(data[i]) if i < len(data) and data[i] != "=" else 0
i += 1
enc4 = alphabet.index(data[i]) if i < len(data) and data[i] != "=" else 0
i += 1

chr1 = (enc1 << 2) | (enc2 >> 4)
chr2 = ((enc2 & 15) << 4) | (enc3 >> 2)
chr3 = ((enc3 & 3) << 6) | enc4

if 0 <= chr1 < 256:
output.append(chr1)
if 0 <= chr2 < 256 and data[i - 2] != "=":
output.append(chr2)
if 0 <= chr3 < 256 and data[i - 1] != "=":
output.append(chr3)

return bytes(output)

@staticmethod
def encode_base64(data: bytes, alphabet: str):
output = ""
i = 0
padding_char = (
"=" if alphabet[-1] == "=" else None
) # Check if '=' is in the alphabet, otherwise use None

while i < len(data):
chr1 = data[i] if i < len(data) else 0
i += 1
chr2 = data[i] if i < len(data) else 0
i += 1
chr3 = data[i] if i < len(data) else 0
i += 1

enc1 = chr1 >> 2
enc2 = ((chr1 & 3) << 4) | (chr2 >> 4)
enc3 = ((chr2 & 15) << 2) | (chr3 >> 6)
enc4 = chr3 & 63

if i > len(data) + 1:
enc3 = 64
enc4 = 64
elif i > len(data):
enc4 = 64

output += alphabet[enc1]
output += alphabet[enc2]
output += (
alphabet[enc3]
if enc3 < 64
else (padding_char if padding_char is not None else "")
)
output += (
alphabet[enc4]
if enc4 < 64
else (padding_char if padding_char is not None else "")
)

# Remove padding characters if they are not part of the alphabet
if padding_char is None:
output = output.rstrip(
alphabet[-1]
) # Strip the last character of the alphabet if it's not '='

return output


def expand_alpha_range(alph_str: str, join_by: Union[str, None] = None):
def expand_range(start, end):
return [str(x) for x in range(int(start), int(end) + 1)]

def expand_char_range(start, end):
return [chr(x) for x in range(ord(start), ord(end) + 1)]

hold = []
i = 0
length = len(alph_str)

while i < length:
# Check for numeric ranges
if (
i < length - 2
and alph_str[i].isdigit()
and alph_str[i + 1] == "-"
and alph_str[i + 2].isdigit()
):
start = ""
while i < length and alph_str[i].isdigit():
start += alph_str[i]
i += 1
i += 1 # Skip the '-'
end = ""
while i < length and alph_str[i].isdigit():
end += alph_str[i]
i += 1
hold.extend(expand_range(start, end))
elif (
i < length - 2
and alph_str[i].isalpha()
and alph_str[i + 1] == "-"
and alph_str[i + 2].isalpha()
):
start = alph_str[i]
end = alph_str[i + 2]
hold.extend(expand_char_range(start, end))
i += 3
elif (
i < length - 2 and alph_str[i] == "\\" and alph_str[i + 1] == "-"
): # pragma: no cover
hold.append("-")
i += 2
else:
hold.append(alph_str[i])
i += 1

if join_by is not None:
return join_by.join(hold)
return hold
39 changes: 2 additions & 37 deletions chepy/modules/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import difflib
from collections import OrderedDict
from typing import TypeVar, Union, Any
from .internal.helpers import expand_alpha_range as _ex_al_range

import chepy.modules.internal.colors as _int_colors

Expand Down Expand Up @@ -831,41 +832,5 @@ def expand_alpha_range(self, join_by: Union[str, None] = None):
Chepy: The Chepy object.
"""
alph_str = self._convert_to_str()
hold = []

def expand_range(start, end):
return [str(x) for x in range(int(start), int(end) + 1)]

def expand_char_range(start, end):
return [chr(x) for x in range(ord(start), ord(end) + 1)]

hold = []
i = 0
length = len(alph_str)

while i < length:
# Match numerical ranges like 10-20
num_match = re.match(r"(\d+)-(\d+)", alph_str[i:])
if num_match:
start, end = num_match.groups()
hold.extend(expand_range(start, end))
i += len(start) + len(end) + 1 # move past the number range
elif i < length - 2 and alph_str[i + 1] == "-" and alph_str[i] != "\\":
# Handle character ranges like a-z
start = alph_str[i]
end = alph_str[i + 2]
hold.extend(expand_char_range(start, end))
i += 2
elif (
i < length - 2 and alph_str[i] == "\\" and alph_str[i + 1] == "-"
): # pragma: no cover
hold.append("-")
i += 1
else:
hold.append(alph_str[i])
i += 1

if join_by is not None:
hold = join_by.join(hold)
self.state = hold
self.state = _ex_al_range(alph_str=alph_str, join_by=join_by)
return self
2 changes: 1 addition & 1 deletion chepy/modules/utils.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -41,4 +41,4 @@ class Utils(ChepyCore):
def drop_bytes(self: UtilsT, start: int, length: int) -> UtilsT: ...
def without(self: UtilsT, *values: Any) -> UtilsT: ...
def pick(self: UtilsT, *values: Any) -> UtilsT: ...
def expand_alpha_range(self, join_by: Union[str, None]=None) -> UtilsT: ...
def expand_alpha_range(self: UtilsT, join_by: Union[str, None]=None) -> UtilsT: ...
Loading

0 comments on commit d3e7c72

Please sign in to comment.