Add typing support

getmoto · Dec 6, 2023 · c473ae9 · c473ae9
1 parent e951e6a
commit c473ae9
Show file tree

Hide file tree

Showing 25 changed files with 483 additions and 439 deletions.
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -27,6 +27,7 @@ jobs:
       run: |
         black --check .
         flake8 py_partiql_parser/
+        mypy
 
   test:
     name: Unit test

diff --git a/mypy.ini b/mypy.ini
@@ -1,5 +1,30 @@
 [mypy]
-python_version = 3.8
-warn_return_any = False
-warn_unused_configs = True
-disallow_untyped_calls = True
+files= py_partiql_parser, tests
+show_column_numbers=True
+show_error_codes = True
+
+disallow_any_unimported=True
+disallow_any_expr=False
+disallow_any_decorated=True
+disallow_any_explicit=False
+disallow_any_generics=True
+disallow_subclassing_any=True
+
+disallow_untyped_calls=True
+disallow_untyped_defs=True
+disallow_incomplete_defs=True
+check_untyped_defs=True
+disallow_untyped_decorators=True
+
+no_implicit_optional=True
+strict_optional=True
+
+warn_redundant_casts=True
+warn_unused_ignores=True
+warn_no_return=True
+warn_return_any=False
+warn_unreachable=True
+
+strict_equality=True
+ignore_missing_imports=True
+follow_imports=silent
diff --git a/py_partiql_parser/__init__.py b/py_partiql_parser/__init__.py
@@ -2,6 +2,6 @@
 
 
 from ._internal.parser import DynamoDBStatementParser, S3SelectParser  # noqa
-from ._internal.json_parser import MissingVariable, SelectEncoder  # noqa
+from ._internal.json_parser import SelectEncoder  # noqa
 from ._internal.csv_converter import csv_to_json  # noqa
-from ._internal.utils import QueryMetadata  # noqa
+from ._internal.utils import MissingVariable, QueryMetadata  # noqa
diff --git a/py_partiql_parser/_internal/case_insensitive_dict.py b/py_partiql_parser/_internal/case_insensitive_dict.py
diff --git a/py_partiql_parser/_internal/clause_tokenizer.py b/py_partiql_parser/_internal/clause_tokenizer.py
@@ -1,12 +1,12 @@
-from typing import Optional, AnyStr, List
+from typing import Optional, List
 
 
 class ClauseTokenizer:
-    def __init__(self, from_clause) -> None:
+    def __init__(self, from_clause: str):
         self.token_list = from_clause
         self.token_pos = 0
 
-    def current(self):
+    def current(self) -> Optional[str]:
         """
         Returns the current char - or None
         """
@@ -15,7 +15,7 @@ def current(self):
         except IndexError:
             return None
 
-    def next(self) -> Optional[AnyStr]:
+    def next(self) -> Optional[str]:
         """
         Returns the next token - or None
         :return:
@@ -27,13 +27,13 @@ def next(self) -> Optional[AnyStr]:
         except IndexError:
             return None
 
-    def peek(self):
+    def peek(self) -> Optional[str]:
         try:
             return self.token_list[self.token_pos + 1]
         except IndexError:
             return None
 
-    def revert(self):
+    def revert(self) -> None:
         self.token_pos -= 1
 
     def skip_white_space(self) -> None:

diff --git a/py_partiql_parser/_internal/csv_converter.py b/py_partiql_parser/_internal/csv_converter.py
@@ -2,7 +2,7 @@
 from typing import List
 
 
-def csv_to_json(input: str, headers_included=False) -> str:
+def csv_to_json(input: str, headers_included: bool = False) -> str:
     output = ""
     headers: List[str] = []
     for line in input.split("\n"):

diff --git a/py_partiql_parser/_internal/from_parser.py b/py_partiql_parser/_internal/from_parser.py
@@ -1,4 +1,4 @@
-from typing import Any, Dict, List
+from typing import Any, Dict
 
 from .clause_tokenizer import ClauseTokenizer
 from .json_parser import JsonParser
@@ -8,10 +8,7 @@
 
 
 class FromParser:
-    def __init__(self):
-        self.clauses = None
-
-    def parse(self, from_clause) -> Dict[str, str]:
+    def __init__(self, from_clause: str):
         """
         Parse a FROM-clause in a PARTIQL query
         :param from_clause: a string of format `a AS b, x AS y` where `a` and `x` can contain commas
@@ -20,7 +17,7 @@ def parse(self, from_clause) -> Dict[str, str]:
         clauses: Dict[str, Any] = dict()
         section = None  # NAME/AS/ALIAS
         current_phrase = ""
-        name = alias = None
+        name = None
         from_clause_parser = ClauseTokenizer(from_clause)
         while True:
             c = from_clause_parser.next()
@@ -41,7 +38,6 @@ def parse(self, from_clause) -> Dict[str, str]:
                     current_phrase = ""
                     section = "AS"
                 elif section == "ALIAS":
-                    alias = current_phrase  # noqa
                     current_phrase = ""
                     section = "NAME"
                 continue
@@ -76,11 +72,10 @@ def parse(self, from_clause) -> Dict[str, str]:
 
         # {alias: full_name_of_table_or_file}
         self.clauses = clauses
-        return clauses
 
 
 class S3FromParser(FromParser):
-    def get_source_data(self, documents: Dict[str, str]):
+    def get_source_data(self, documents: Dict[str, str]) -> Any:
         from_alias = list(self.clauses.keys())[0].lower()
         from_query = list(self.clauses.values())[0].lower()
         if "." in from_query:
@@ -105,7 +100,7 @@ def get_source_data(self, documents: Dict[str, str]):
         else:
             return source_data
 
-    def _get_nested_source_data(self, documents: Dict[str, str]):
+    def _get_nested_source_data(self, documents: Dict[str, Any]) -> Any:
         """
         Our FROM-clauses are nested, meaning we need to dig into the provided document to return the key that we need
            --> FROM s3object.name as name
@@ -122,14 +117,17 @@ def _get_nested_source_data(self, documents: Dict[str, str]):
             key_so_far.append(key)
             key_has_asterix = key.endswith("[*]") and key[0:-3] in source_data
             new_key = key[0:-3] if key_has_asterix else key
-            if iterate_over_docs and isinstance(source_data, list):
+            if iterate_over_docs and isinstance(source_data, list):  # type: ignore[unreachable]
                 # The previous key ended in [*]
                 # Iterate over all docs in the result, and only return the requested source key
-                if key_so_far == entire_key:
+                if key_so_far == entire_key:  # type: ignore[unreachable]
                     # If we have an alias, we have to use that instead of the original name
                     source_data = [{alias: doc.get(new_key, {})} for doc in source_data]
                 else:
-                    source_data = [doc.get_original(new_key, {}) for doc in source_data]
+                    source_data = [
+                        doc.get_original(new_key) or CaseInsensitiveDict({})
+                        for doc in source_data
+                    ]
             else:
                 # The previous key was a regular key
                 # Assume that the result consists of a singular JSON document
@@ -142,8 +140,8 @@ def _get_nested_source_data(self, documents: Dict[str, str]):
                         # AWS behaviour when the root-document is a list
                         source_data = {"_1": source_data}
                     elif key_so_far == entire_key:
-                        if isinstance(source_data, list):
-                            source_data = [{alias: doc} for doc in source_data]
+                        if isinstance(source_data, list):  # type: ignore[unreachable]
+                            source_data = [{alias: doc} for doc in source_data]  # type: ignore[unreachable]
                         else:
                             source_data = {alias: source_data}
                 else:
@@ -156,8 +154,8 @@ def _get_nested_source_data(self, documents: Dict[str, str]):
 
 
 class DynamoDBFromParser(FromParser):
-    def parse(self, from_clause) -> Dict[str, str]:
-        super().parse(from_clause)
+    def __init__(self, from_clause: str):
+        super().__init__(from_clause)
 
         for alias, table_name in list(self.clauses.items()):
             if table_name[0].isnumeric():
@@ -167,9 +165,3 @@ def parse(self, from_clause) -> Dict[str, str]:
 
             if table_name[0] == '"' and table_name[-1] == '"':
                 self.clauses[alias] = table_name[1:-1]
-
-        return self.clauses
-
-    def get_source_data(self, documents: Dict[str, List[Dict[str, Any]]]):
-        list_of_json_docs = documents[list(self.clauses.values())[0].lower()]
-        return [CaseInsensitiveDict(doc) for doc in list_of_json_docs]
diff --git a/py_partiql_parser/_internal/json_parser.py b/py_partiql_parser/_internal/json_parser.py
@@ -1,63 +1,32 @@
 from json import JSONEncoder
-from typing import Dict, Any, List, Union
+from typing import Any, List, Optional
 
 from .clause_tokenizer import ClauseTokenizer
-from .case_insensitive_dict import CaseInsensitiveDict
+from .utils import CaseInsensitiveDict, Variable
 
 ACCEPTED_QUOTES = ["'", '"', "’"]
 NEW_LINE = "\n"
 
 
-class Variable:
-    def __init__(self, value: Any) -> None:
-        self.value = value
-        if value == "null":
-            self.value = None
-        elif isinstance(value, str) and value.lower() in ["true", "false"]:
-            self.value = bool(value)
-
-    def __repr__(self) -> str:
-        return f"<{self.value}>"
-
-    def __hash__(self) -> int:
-        return hash(self.value)
-
-    def __eq__(self, other) -> bool:
-        return other and isinstance(other, Variable) and self.value == other.value
-
-    def apply(self, value) -> Any:
-        if isinstance(value, dict):
-            split_value = (
-                self.value.split(".") if isinstance(self.value, str) else [self.value]
-            )
-            current_key = split_value[0]
-            if current_key not in value:
-                return MissingVariable()
-            remaining_keys = ".".join(split_value[1:])
-            return Variable(remaining_keys).apply(value[current_key])
-        else:
-            return value
-
-
-class MissingVariable(Variable):
-    def __init__(self) -> None:
-        super().__init__(value=None)
-
-
 class JsonParser:
     """
     Input can be a multiple documents, separated by a new-line (\n) characters
     So we can't use the builtin JSON parser
     """
 
-    def parse(self, original, tokenizer=None, only_parse_initial=False) -> Any:
+    def parse(
+        self,
+        original: str,
+        tokenizer: Optional[ClauseTokenizer] = None,
+        only_parse_initial: bool = False,
+    ) -> Any:
         if not (original.startswith("{") or original.startswith("[")):
             # Doesn't look like JSON - let's return as a variable
             return original if original.isnumeric() else Variable(original)
-        section = None  # DICT_KEY | KEY_TO_VALUE | DICT_VAL | OBJECT_END
-        dict_key = None
+        section: Optional[str] = None  # DICT_KEY | KEY_TO_VALUE | DICT_VAL | OBJECT_END
+        dict_key = ""
         current_phrase = ""
-        result: Dict[Any, Any] = CaseInsensitiveDict()
+        result = CaseInsensitiveDict()
         tokenizer = tokenizer or ClauseTokenizer(original)
         while True:
             c = tokenizer.next()
@@ -160,8 +129,8 @@ def parse(self, original, tokenizer=None, only_parse_initial=False) -> Any:
 
         return result
 
-    def _parse_list(self, original, tokenizer) -> Any:
-        result: List[Union[Any, Dict]] = list()
+    def _parse_list(self, original: str, tokenizer: ClauseTokenizer) -> Any:
+        result: List[Any] = list()
         section = None
         current_phrase = ""
         while True:
@@ -212,7 +181,7 @@ def _parse_list(self, original, tokenizer) -> Any:
 
 
 class SelectEncoder(JSONEncoder):
-    def default(self, o):
+    def default(self, o: Any) -> Any:
         if isinstance(o, Variable) and o.value is None:
             return None
         if isinstance(o, CaseInsensitiveDict):