🗓 Jun 25, 2024 11:42:29 PM

🐙 improve get_by_key to handle * 🔥 deprecate get_by_key 🔧 fix pytest warnings on regex strings
securisec · Jun 26, 2024 · a4ab8bc · a4ab8bc
1 parent ae1de75
commit a4ab8bc
Show file tree

Hide file tree

Showing 10 changed files with 66 additions and 56 deletions.
diff --git a/chepy/core.py b/chepy/core.py
@@ -715,14 +715,23 @@ def _get_nested_value(self, data, key, split_by="."):
             return data[key]
         try:
             keys = key.split(split_by)
-            for k in keys:
-                if "[" in k:
+            for key in keys:
+                if "[" in key:
                     # Extract the key and index
-                    k, index_str = k.split("[")
-                    index = int(index_str.rstrip("]"))
-                    data = data[k][index]
+                    key, index_str = key.split("[")
+                    index_str = index_str.rstrip("]").strip()
+                    if index_str == "*":
+                        data = [data[key][i] for i in range(len(data[key]))]
+                    else:
+                        index = int(index_str)
+                        data = data[key][index]
                 else:
-                    data = data[k]
+                    if isinstance(data, list):
+                        data = [
+                            data[i][key] for i in range(len(data)) if key in data[i]
+                        ]
+                    else:
+                        data = data[key] if key in data else data
             return data
         except Exception as e:  # pragma: no cover
             self._error_logger(e)
@@ -1515,7 +1524,7 @@ def register(
 
         Examples:
             >>> c = Chepy("hello world")
-            >>> c.register(r"(hello)\s(world)")
+            >>> c.register("(hello)\\s(world)")
             >>> c._registers
             {'$R0': 'hello', '$R1': 'world'}
         """

diff --git a/chepy/modules/dataformat.py b/chepy/modules/dataformat.py
@@ -158,28 +158,6 @@ def dict_to_json(self) -> DataFormatT:
         self.state = json.dumps(self.state)
         return self
 
-    @ChepyDecorators.call_stack
-    def dict_get_items(self, *keys: str) -> DataFormatT:
-        """Get items from a dict. If no keys are specified, it will return all items.
-
-        Returns:
-            Chepy: The Chepy object.
-
-        Examples:
-            >>> o = Chepy({"a": 1, "b": 2}).dict_get_items("a", "b", "c").o
-            [1, 2]
-        """
-        assert isinstance(self.state, dict), "Not a dict object"
-        if len(keys) == 0:
-            self.state = list(self.state.values())
-            return self
-        o = list()
-        for k in keys:
-            if self.state.get(k):
-                o.append(self.state.get(k))
-        self.state = o
-        return self
-
     @ChepyDecorators.call_stack
     def yaml_to_json(self) -> DataFormatT:  # pragma: no cover
         """Convert yaml to a json string
@@ -610,12 +588,18 @@ def to_hex(self, delimiter: str = "") -> DataFormatT:
         return self
 
     @ChepyDecorators.call_stack
-    def from_hex(self, delimiter: str = None, join_by: str = "") -> DataFormatT:
+    def from_hex(
+        self,
+        delimiter: str = None,
+        join_by: str = "",
+        replace: Union[bytes, None] = b"%|0x",
+    ) -> DataFormatT:
         """Convert a non delimited hex string to string
 
         Args:
             delimiter (str, optional): Delimiter. Defaults to None.
             join_by (str, optional): Join by. Defaults to ' '.
+            replace (Union[bytes, None], optional): Regex pattern to replace hex string prefixes. Defaults to b'%x|0x'.
 
         Returns:
             Chepy: The Chepy object.
@@ -625,6 +609,9 @@ def from_hex(self, delimiter: str = None, join_by: str = "") -> DataFormatT:
             b"AAA"
         """
         data = self._convert_to_bytes()
+        if replace is not None:
+            replace = self._str_to_bytes(replace)
+            data = re.sub(replace, b"", data)
         if delimiter is None:
             delimiter = detect_delimiter(data, default_delimiter=None)
         if delimiter is not None:

diff --git a/chepy/modules/dataformat.pyi b/chepy/modules/dataformat.pyi
@@ -14,7 +14,6 @@ class DataFormat(ChepyCore):
     def join(self: DataFormatT, join_by: Union[str, bytes]=...) -> DataFormatT: ...
     def json_to_dict(self: DataFormatT) -> DataFormatT: ...
     def dict_to_json(self: DataFormatT) -> DataFormatT: ...
-    def dict_get_items(self: DataFormatT, *keys: str) -> DataFormatT: ...
     def yaml_to_json(self: DataFormatT) -> DataFormatT: ...
     def json_to_yaml(self: DataFormatT) -> DataFormatT: ...
     def to_base58(self: DataFormatT) -> DataFormatT: ...
@@ -32,7 +31,7 @@ class DataFormat(ChepyCore):
     def from_base64(self: DataFormatT, custom: str=..., url_safe: bool=..., remove_whitespace: bool=True) -> DataFormatT: ...
     def decode_bytes(self: DataFormatT, errors: Literal['ignore', 'backslashreplace', 'replace']=...) -> DataFormatT: ...
     def to_hex(self: DataFormatT, delimiter: str=..., join_by: str=...) -> DataFormatT: ...
-    def from_hex(self: DataFormatT, delimiter: Union[str, None]=None, join_by: str='') -> DataFormatT: ...
+    def from_hex(self: DataFormatT, delimiter: Union[str, None]=None, join_by: str='', replace: Union[bytes, None]=b'%|0x') -> DataFormatT: ...
     def hex_to_int(self: DataFormatT) -> DataFormatT: ...
     def hex_to_bytes(self: DataFormatT) -> DataFormatT: ...
     def hex_to_str(self: DataFormatT, ignore: bool=...) -> DataFormatT: ...

diff --git a/chepy/modules/extractors.py b/chepy/modules/extractors.py
@@ -80,7 +80,7 @@ def extract_strings(
             __TEXT'
             ...
         """
-        pattern = b"[^\x00-\x1F\x7F-\xFF]{" + str(length).encode() + b",}"
+        pattern = b"[^\x00-\x1f\x7f-\xff]{" + str(length).encode() + b",}"
         matches = re.findall(pattern, self._convert_to_bytes())
         self.state = self._str_to_bytes(join_by).join([m for m in matches])
         return self
@@ -96,15 +96,16 @@ def extract_ips(self, is_binary: bool = False) -> ExtractorsT:
         Returns:
             Chepy: The Chepy object.
         """
-        pattern = b"((^\s*((([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5]))\s*$)|(^\s*((([0-9A-Fa-f]{1,4}:){7}([0-9A-Fa-f]{1,4}|:))|(([0-9A-Fa-f]{1,4}:){6}(:[0-9A-Fa-f]{1,4}|((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){5}(((:[0-9A-Fa-f]{1,4}){1,2})|:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){4}(((:[0-9A-Fa-f]{1,4}){1,3})|((:[0-9A-Fa-f]{1,4})?:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){3}(((:[0-9A-Fa-f]{1,4}){1,4})|((:[0-9A-Fa-f]{1,4}){0,2}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){2}(((:[0-9A-Fa-f]{1,4}){1,5})|((:[0-9A-Fa-f]{1,4}){0,3}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){1}(((:[0-9A-Fa-f]{1,4}){1,6})|((:[0-9A-Fa-f]{1,4}){0,4}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(:(((:[0-9A-Fa-f]{1,4}){1,7})|((:[0-9A-Fa-f]{1,4}){0,5}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:)))(%.+)?\s*$))"
+        pattern = r"((^\s*((([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5]))\s*$)|(^\s*((([0-9A-Fa-f]{1,4}:){7}([0-9A-Fa-f]{1,4}|:))|(([0-9A-Fa-f]{1,4}:){6}(:[0-9A-Fa-f]{1,4}|((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){5}(((:[0-9A-Fa-f]{1,4}){1,2})|:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3})|:))|(([0-9A-Fa-f]{1,4}:){4}(((:[0-9A-Fa-f]{1,4}){1,3})|((:[0-9A-Fa-f]{1,4})?:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){3}(((:[0-9A-Fa-f]{1,4}){1,4})|((:[0-9A-Fa-f]{1,4}){0,2}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){2}(((:[0-9A-Fa-f]{1,4}){1,5})|((:[0-9A-Fa-f]{1,4}){0,3}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(([0-9A-Fa-f]{1,4}:){1}(((:[0-9A-Fa-f]{1,4}){1,6})|((:[0-9A-Fa-f]{1,4}){0,4}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:))|(:(((:[0-9A-Fa-f]{1,4}){1,7})|((:[0-9A-Fa-f]{1,4}){0,5}:((25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)(\.(25[0-5]|2[0-4]\d|1\d\d|[1-9]?\d)){3}))|:)))(%.+)?\s*$))"
         if is_binary:  # pragma: no cover
             matched = list(
                 filter(lambda x: re.search(pattern, x), self.extract_strings().o)
             )
         else:
             matched = list(
                 filter(
-                    lambda x: re.search(pattern, x), self._convert_to_bytes().split()
+                    lambda x: re.search(pattern.encode(), x),
+                    self._convert_to_bytes().split(),
                 )
             )
         self.state = matched
@@ -127,11 +128,11 @@ def extract_email(self, is_binary: bool = False) -> ExtractorsT:
 
             >>> Chepy("tests/files/test.der").load_file().extract_email(is_binary=True).o
         """
-        pattern = b"(^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$)"
+        pattern = r"(^[a-zA-Z0-9_.+-]+@[a-zA-Z0-9-]+\.[a-zA-Z0-9-.]+$)"
         if is_binary:
             matched = list(
                 filter(
-                    lambda x: re.search(pattern, x),
+                    lambda x: re.search(pattern.encode(), x),
                     self.extract_strings().o.splitlines(),
                 )
             )
@@ -180,15 +181,16 @@ def extract_urls(self, is_binary: bool = False) -> ExtractorsT:
         Returns:
             Chepy: The Chepy object.
         """
-        pattern = b"(file|ftps?|http[s]?|ssh)://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+"
+        pattern = r"(file|ftps?|http[s]?|ssh)://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+"
         if is_binary:  # pragma: no cover
             matched = list(
                 filter(lambda x: re.search(pattern, x), self.extract_strings().o)
             )
         else:
             matched = list(
                 filter(
-                    lambda x: re.search(pattern, x), self._convert_to_bytes().split()
+                    lambda x: re.search(pattern.encode(), x),
+                    self._convert_to_bytes().split(),
                 )
             )
         self.state = matched

diff --git a/chepy/modules/links.py b/chepy/modules/links.py
@@ -41,7 +41,7 @@ def github_to_raw(self) -> LinksT:
             "/blob",
             "",
             re.sub(
-                "(github\.com)(/)",
+                r"(github\.com)(/)",
                 r"raw.githubusercontent.com\2",
                 self._convert_to_str(),
             ),
@@ -62,7 +62,7 @@ def google_search_ei_to_epoch(self) -> LinksT:
         decoded = base64.urlsafe_b64decode(self._convert_to_str())
         timestamp = ord(chr(decoded[0]))
         timestamp += ord(chr(decoded[1])) * 256
-        timestamp += ord(chr(decoded[2])) * 256 ** 2
-        timestamp += ord(chr(decoded[3])) * 256 ** 3
+        timestamp += ord(chr(decoded[2])) * 256**2
+        timestamp += ord(chr(decoded[3])) * 256**3
         self.state = timestamp
         return self
diff --git a/chepy/modules/networking.py b/chepy/modules/networking.py
@@ -179,7 +179,7 @@ def get_ssl_cert(self, port: int = 443) -> NetworkingT:
                 'crlDistributionPoints': ('http://crl.pki.goog/GTS1O1.crl',)
             }
         """
-        domain = re.sub("^\w+://", "", self._convert_to_str())
+        domain = re.sub(r"^\w+://", "", self._convert_to_str())
         with socket.create_connection((domain, port)) as sock:
             context = ssl.create_default_context()
             context.check_hostname = False
@@ -224,4 +224,4 @@ def ip_to_int(self) -> NetworkingT:
             >>> Chepy(3232235777).int_to_ip().o
         """
         self.state = int(ipaddress.ip_address(self._convert_to_str()))
-        return self
+        return self
diff --git a/chepy/modules/search.py b/chepy/modules/search.py
@@ -61,7 +61,7 @@ def search_list(self, pattern: Union[str, bytes]) -> SearchT:
         return self
 
     @ChepyDecorators.call_stack
-    def search_ctf_flags(self, prefix: str, postfix: str = ".+?\{*\}") -> SearchT:
+    def search_ctf_flags(self, prefix: str, postfix: str = ".+?\\{*\\}") -> SearchT:
         """Search CTF style flags.
 
         This by default assumes that the flag format is similar
@@ -70,7 +70,7 @@ def search_ctf_flags(self, prefix: str, postfix: str = ".+?\{*\}") -> SearchT:
         Args:
             prefix (str): Prefix of the flag. Like `picoCTF`
             postfix (str, optional): Regex for the remainder of the flag.
-                Defaults to '.+\{.+}'.
+                Defaults to '.+\\{.+\\}'.
 
         Returns:
             Chepy: The Chepy object.
@@ -107,7 +107,7 @@ def search_slack_webhook(self) -> SearchT:
             Chepy: The Chepy object.
         """
         self.state = re.findall(
-            "https://hooks\.slack\.com/services/T[a-zA-Z0-9_]{8}/B[a-zA-Z0-9_]{8}/[a-zA-Z0-9_]{24}",
+            r"https://hooks\.slack\.com/services/T[a-zA-Z0-9_]{8}/B[a-zA-Z0-9_]{8}/[a-zA-Z0-9_]{24}",
             self._convert_to_str(),
         )
         return self

diff --git a/chepy/modules/utils.py b/chepy/modules/utils.py
@@ -155,7 +155,9 @@ def regex_search(
         if extended:
             flags += re.X
         if is_bytes:
-            self.state = re.findall(self._to_bytes(pattern), self._convert_to_bytes(), flags=flags)
+            self.state = re.findall(
+                self._to_bytes(pattern), self._convert_to_bytes(), flags=flags
+            )
         else:
             self.state = re.findall(pattern, self._convert_to_str(), flags=flags)
         return self
@@ -463,7 +465,7 @@ def strip_ansi(self) -> UtilsT:
             "This is a string"
         """
         self.state = re.sub(
-            "[\u001B\u009B][[\\]()#;?]*(?:(?:(?:[a-zA-Z\\d]*(?:;[a-zA-Z\\d]*)*)?\u0007)|(?:(?:\\d{1,4}(?:;\\d{0,4})*)?[\\dA-PRZcf-ntqry=><~]))",
+            "[\u001b\u009b][[\\]()#;?]*(?:(?:(?:[a-zA-Z\\d]*(?:;[a-zA-Z\\d]*)*)?\u0007)|(?:(?:\\d{1,4}(?:;\\d{0,4})*)?[\\dA-PRZcf-ntqry=><~]))",
             "",
             self._convert_to_str(),
         )
@@ -481,7 +483,7 @@ def strip(self, pattern: str, ignore_case=True) -> UtilsT:
             Chepy: The Chepy object.
 
         Examples:
-            >>> Chepy("some some data").strip(r"some\s").o
+            >>> Chepy("some some data").strip("some\\s").o
             "data"
         """
         flags = 0
@@ -503,7 +505,7 @@ def find_replace(self, pattern: str, repl: str, ignore_case=True) -> UtilsT:
             Chepy: The Chepy object.
 
         Examples:
-            >>> Chepy("some some data").find_replace(r"some\s", "data").o
+            >>> Chepy("some some data").find_replace("some\\s", "data").o
             "datadatadata"
         """
         flags = 0

diff --git a/tests/test_core.py b/tests/test_core.py
@@ -93,6 +93,11 @@ def test_get_by_key():
         == b"New"
     )
     assert Chepy(data2).get_by_key("menu", split_key=None).o.get("id") == "file"
+    assert Chepy(data2).get_by_key("menu.popup.menuitem[*].value").o == [
+        "New",
+        "Open",
+        "Close",
+    ]
 
 
 def test_delete_state():
@@ -152,7 +157,10 @@ def test_load_file_binary():
 
 def test_show_recipe():
     assert Chepy("4142").from_hex().recipe == [
-        {"function": "from_hex", "args": {"delimiter": None, "join_by": ""}}
+        {
+            "function": "from_hex",
+            "args": {"delimiter": None, "join_by": "", "replace": b"%|0x"},
+        }
     ]
 
 

diff --git a/tests/test_dataformat.py b/tests/test_dataformat.py
@@ -175,6 +175,13 @@ def test_from_hex():
     assert (
         Chepy("41;41;41").from_hex(delimiter=";", join_by="%").out.decode() == "A%A%A"
     )
+    assert (
+        Chepy("%41;41;41").from_hex(delimiter=";", join_by="%").out.decode() == "A%A%A"
+    )
+    assert (
+        Chepy("%41;0x41;%41").from_hex(delimiter=";", join_by="%").out.decode()
+        == "A%A%A"
+    )
 
 
 def test_hex_to_int():
@@ -244,10 +251,6 @@ def test_get_by_index():
     assert Chepy([1, "a", True]).get_by_index(2).state
 
 
-def test_get_by_key():
-    assert Chepy('{"some": "data"}').json_to_dict().get_by_key("some").o == b"data"
-
-
 def test_to_bytes():
     assert Chepy({"some": "val", "kl": 1}).to_bytes().o == b"{'some': 'val', 'kl': 1}"