Skip to content

Commit

Permalink
Add typing support
Browse files Browse the repository at this point in the history
  • Loading branch information
bblommers committed Dec 6, 2023
1 parent e951e6a commit c473ae9
Show file tree
Hide file tree
Showing 25 changed files with 483 additions and 439 deletions.
1 change: 1 addition & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ jobs:
run: |
black --check .
flake8 py_partiql_parser/
mypy
test:
name: Unit test
Expand Down
33 changes: 29 additions & 4 deletions mypy.ini
Original file line number Diff line number Diff line change
@@ -1,5 +1,30 @@
[mypy]
python_version = 3.8
warn_return_any = False
warn_unused_configs = True
disallow_untyped_calls = True
files= py_partiql_parser, tests
show_column_numbers=True
show_error_codes = True

disallow_any_unimported=True
disallow_any_expr=False
disallow_any_decorated=True
disallow_any_explicit=False
disallow_any_generics=True
disallow_subclassing_any=True

disallow_untyped_calls=True
disallow_untyped_defs=True
disallow_incomplete_defs=True
check_untyped_defs=True
disallow_untyped_decorators=True

no_implicit_optional=True
strict_optional=True

warn_redundant_casts=True
warn_unused_ignores=True
warn_no_return=True
warn_return_any=False
warn_unreachable=True

strict_equality=True
ignore_missing_imports=True
follow_imports=silent
4 changes: 2 additions & 2 deletions py_partiql_parser/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,6 @@


from ._internal.parser import DynamoDBStatementParser, S3SelectParser # noqa
from ._internal.json_parser import MissingVariable, SelectEncoder # noqa
from ._internal.json_parser import SelectEncoder # noqa
from ._internal.csv_converter import csv_to_json # noqa
from ._internal.utils import QueryMetadata # noqa
from ._internal.utils import MissingVariable, QueryMetadata # noqa
59 changes: 0 additions & 59 deletions py_partiql_parser/_internal/case_insensitive_dict.py

This file was deleted.

12 changes: 6 additions & 6 deletions py_partiql_parser/_internal/clause_tokenizer.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
from typing import Optional, AnyStr, List
from typing import Optional, List


class ClauseTokenizer:
def __init__(self, from_clause) -> None:
def __init__(self, from_clause: str):
self.token_list = from_clause
self.token_pos = 0

def current(self):
def current(self) -> Optional[str]:
"""
Returns the current char - or None
"""
Expand All @@ -15,7 +15,7 @@ def current(self):
except IndexError:
return None

def next(self) -> Optional[AnyStr]:
def next(self) -> Optional[str]:
"""
Returns the next token - or None
:return:
Expand All @@ -27,13 +27,13 @@ def next(self) -> Optional[AnyStr]:
except IndexError:
return None

def peek(self):
def peek(self) -> Optional[str]:
try:
return self.token_list[self.token_pos + 1]
except IndexError:
return None

def revert(self):
def revert(self) -> None:
self.token_pos -= 1

def skip_white_space(self) -> None:
Expand Down
2 changes: 1 addition & 1 deletion py_partiql_parser/_internal/csv_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from typing import List


def csv_to_json(input: str, headers_included=False) -> str:
def csv_to_json(input: str, headers_included: bool = False) -> str:
output = ""
headers: List[str] = []
for line in input.split("\n"):
Expand Down
38 changes: 15 additions & 23 deletions py_partiql_parser/_internal/from_parser.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Any, Dict, List
from typing import Any, Dict

from .clause_tokenizer import ClauseTokenizer
from .json_parser import JsonParser
Expand All @@ -8,10 +8,7 @@


class FromParser:
def __init__(self):
self.clauses = None

def parse(self, from_clause) -> Dict[str, str]:
def __init__(self, from_clause: str):
"""
Parse a FROM-clause in a PARTIQL query
:param from_clause: a string of format `a AS b, x AS y` where `a` and `x` can contain commas
Expand All @@ -20,7 +17,7 @@ def parse(self, from_clause) -> Dict[str, str]:
clauses: Dict[str, Any] = dict()
section = None # NAME/AS/ALIAS
current_phrase = ""
name = alias = None
name = None
from_clause_parser = ClauseTokenizer(from_clause)
while True:
c = from_clause_parser.next()
Expand All @@ -41,7 +38,6 @@ def parse(self, from_clause) -> Dict[str, str]:
current_phrase = ""
section = "AS"
elif section == "ALIAS":
alias = current_phrase # noqa
current_phrase = ""
section = "NAME"
continue
Expand Down Expand Up @@ -76,11 +72,10 @@ def parse(self, from_clause) -> Dict[str, str]:

# {alias: full_name_of_table_or_file}
self.clauses = clauses
return clauses


class S3FromParser(FromParser):
def get_source_data(self, documents: Dict[str, str]):
def get_source_data(self, documents: Dict[str, str]) -> Any:
from_alias = list(self.clauses.keys())[0].lower()
from_query = list(self.clauses.values())[0].lower()
if "." in from_query:
Expand All @@ -105,7 +100,7 @@ def get_source_data(self, documents: Dict[str, str]):
else:
return source_data

def _get_nested_source_data(self, documents: Dict[str, str]):
def _get_nested_source_data(self, documents: Dict[str, Any]) -> Any:
"""
Our FROM-clauses are nested, meaning we need to dig into the provided document to return the key that we need
--> FROM s3object.name as name
Expand All @@ -122,14 +117,17 @@ def _get_nested_source_data(self, documents: Dict[str, str]):
key_so_far.append(key)
key_has_asterix = key.endswith("[*]") and key[0:-3] in source_data
new_key = key[0:-3] if key_has_asterix else key
if iterate_over_docs and isinstance(source_data, list):
if iterate_over_docs and isinstance(source_data, list): # type: ignore[unreachable]
# The previous key ended in [*]
# Iterate over all docs in the result, and only return the requested source key
if key_so_far == entire_key:
if key_so_far == entire_key: # type: ignore[unreachable]
# If we have an alias, we have to use that instead of the original name
source_data = [{alias: doc.get(new_key, {})} for doc in source_data]
else:
source_data = [doc.get_original(new_key, {}) for doc in source_data]
source_data = [
doc.get_original(new_key) or CaseInsensitiveDict({})
for doc in source_data
]
else:
# The previous key was a regular key
# Assume that the result consists of a singular JSON document
Expand All @@ -142,8 +140,8 @@ def _get_nested_source_data(self, documents: Dict[str, str]):
# AWS behaviour when the root-document is a list
source_data = {"_1": source_data}
elif key_so_far == entire_key:
if isinstance(source_data, list):
source_data = [{alias: doc} for doc in source_data]
if isinstance(source_data, list): # type: ignore[unreachable]
source_data = [{alias: doc} for doc in source_data] # type: ignore[unreachable]
else:
source_data = {alias: source_data}
else:
Expand All @@ -156,8 +154,8 @@ def _get_nested_source_data(self, documents: Dict[str, str]):


class DynamoDBFromParser(FromParser):
def parse(self, from_clause) -> Dict[str, str]:
super().parse(from_clause)
def __init__(self, from_clause: str):
super().__init__(from_clause)

for alias, table_name in list(self.clauses.items()):
if table_name[0].isnumeric():
Expand All @@ -167,9 +165,3 @@ def parse(self, from_clause) -> Dict[str, str]:

if table_name[0] == '"' and table_name[-1] == '"':
self.clauses[alias] = table_name[1:-1]

return self.clauses

def get_source_data(self, documents: Dict[str, List[Dict[str, Any]]]):
list_of_json_docs = documents[list(self.clauses.values())[0].lower()]
return [CaseInsensitiveDict(doc) for doc in list_of_json_docs]
59 changes: 14 additions & 45 deletions py_partiql_parser/_internal/json_parser.py
Original file line number Diff line number Diff line change
@@ -1,63 +1,32 @@
from json import JSONEncoder
from typing import Dict, Any, List, Union
from typing import Any, List, Optional

from .clause_tokenizer import ClauseTokenizer
from .case_insensitive_dict import CaseInsensitiveDict
from .utils import CaseInsensitiveDict, Variable

ACCEPTED_QUOTES = ["'", '"', "’"]
NEW_LINE = "\n"


class Variable:
def __init__(self, value: Any) -> None:
self.value = value
if value == "null":
self.value = None
elif isinstance(value, str) and value.lower() in ["true", "false"]:
self.value = bool(value)

def __repr__(self) -> str:
return f"<{self.value}>"

def __hash__(self) -> int:
return hash(self.value)

def __eq__(self, other) -> bool:
return other and isinstance(other, Variable) and self.value == other.value

def apply(self, value) -> Any:
if isinstance(value, dict):
split_value = (
self.value.split(".") if isinstance(self.value, str) else [self.value]
)
current_key = split_value[0]
if current_key not in value:
return MissingVariable()
remaining_keys = ".".join(split_value[1:])
return Variable(remaining_keys).apply(value[current_key])
else:
return value


class MissingVariable(Variable):
def __init__(self) -> None:
super().__init__(value=None)


class JsonParser:
"""
Input can be a multiple documents, separated by a new-line (\n) characters
So we can't use the builtin JSON parser
"""

def parse(self, original, tokenizer=None, only_parse_initial=False) -> Any:
def parse(
self,
original: str,
tokenizer: Optional[ClauseTokenizer] = None,
only_parse_initial: bool = False,
) -> Any:
if not (original.startswith("{") or original.startswith("[")):
# Doesn't look like JSON - let's return as a variable
return original if original.isnumeric() else Variable(original)
section = None # DICT_KEY | KEY_TO_VALUE | DICT_VAL | OBJECT_END
dict_key = None
section: Optional[str] = None # DICT_KEY | KEY_TO_VALUE | DICT_VAL | OBJECT_END
dict_key = ""
current_phrase = ""
result: Dict[Any, Any] = CaseInsensitiveDict()
result = CaseInsensitiveDict()
tokenizer = tokenizer or ClauseTokenizer(original)
while True:
c = tokenizer.next()
Expand Down Expand Up @@ -160,8 +129,8 @@ def parse(self, original, tokenizer=None, only_parse_initial=False) -> Any:

return result

def _parse_list(self, original, tokenizer) -> Any:
result: List[Union[Any, Dict]] = list()
def _parse_list(self, original: str, tokenizer: ClauseTokenizer) -> Any:
result: List[Any] = list()
section = None
current_phrase = ""
while True:
Expand Down Expand Up @@ -212,7 +181,7 @@ def _parse_list(self, original, tokenizer) -> Any:


class SelectEncoder(JSONEncoder):
def default(self, o):
def default(self, o: Any) -> Any:
if isinstance(o, Variable) and o.value is None:
return None
if isinstance(o, CaseInsensitiveDict):
Expand Down
Loading

0 comments on commit c473ae9

Please sign in to comment.