diff --git a/ibis/common/parsing.py b/ibis/common/parsing.py deleted file mode 100644 index 64baf1bdadbd..000000000000 --- a/ibis/common/parsing.py +++ /dev/null @@ -1,44 +0,0 @@ -from __future__ import annotations - -import ast -import re - -import parsy - -_STRING_REGEX = ( - """('[^\n'\\\\]*(?:\\\\.[^\n'\\\\]*)*'|"[^\n"\\\\"]*(?:\\\\.[^\n"\\\\]*)*")""" -) - -SPACES = parsy.regex(r"\s*", re.MULTILINE) - - -def spaceless(parser): - return SPACES.then(parser).skip(SPACES) - - -def spaceless_string(*strings: str): - return spaceless( - parsy.alt(*(parsy.string(string, transform=str.lower) for string in strings)) - ) - - -SINGLE_DIGIT = parsy.decimal_digit -RAW_NUMBER = SINGLE_DIGIT.at_least(1).concat() -PRECISION = SCALE = NUMBER = LENGTH = RAW_NUMBER.map(int) -TEMPORAL_SCALE = SINGLE_DIGIT.map(int) - -LPAREN = spaceless_string("(") -RPAREN = spaceless_string(")") - -LBRACKET = spaceless_string("[") -RBRACKET = spaceless_string("]") - -LANGLE = spaceless_string("<") -RANGLE = spaceless_string(">") - -COMMA = spaceless_string(",") -COLON = spaceless_string(":") -SEMICOLON = spaceless_string(";") - -RAW_STRING = parsy.regex(_STRING_REGEX).map(ast.literal_eval) -FIELD = parsy.regex("[a-zA-Z_][a-zA-Z_0-9]*") diff --git a/ibis/expr/datatypes/core.py b/ibis/expr/datatypes/core.py index fa942fdc70f2..44124a927cbe 100644 --- a/ibis/expr/datatypes/core.py +++ b/ibis/expr/datatypes/core.py @@ -825,7 +825,7 @@ def resolution(self): @property def _pretty_piece(self) -> str: - return f"({self.unit!r})" + return f"('{self.unit.value}')" @public diff --git a/ibis/expr/datatypes/parse.py b/ibis/expr/datatypes/parse.py index 6b0f0204f01a..fb1266b211f5 100644 --- a/ibis/expr/datatypes/parse.py +++ b/ibis/expr/datatypes/parse.py @@ -1,31 +1,53 @@ from __future__ import annotations +import ast import functools +import re from operator import methodcaller import parsy from public import public import ibis.expr.datatypes.core as dt -from ibis.common.parsing import ( - COLON, - COMMA, - FIELD, - LANGLE, - LPAREN, - NUMBER, - PRECISION, - RANGLE, - RAW_NUMBER, - RAW_STRING, - RPAREN, - SCALE, - SEMICOLON, - SINGLE_DIGIT, - spaceless, - spaceless_string, + +_STRING_REGEX = ( + """('[^\n'\\\\]*(?:\\\\.[^\n'\\\\]*)*'|"[^\n"\\\\"]*(?:\\\\.[^\n"\\\\]*)*")""" ) +SPACES = parsy.regex(r"\s*", re.MULTILINE) + + +def spaceless(parser): + return SPACES.then(parser).skip(SPACES) + + +def spaceless_string(*strings: str): + return spaceless( + parsy.alt(*(parsy.string(string, transform=str.lower) for string in strings)) + ) + + +SINGLE_DIGIT = parsy.decimal_digit +RAW_NUMBER = SINGLE_DIGIT.at_least(1).concat() +PRECISION = SCALE = NUMBER = LENGTH = RAW_NUMBER.map(int) +TEMPORAL_SCALE = SINGLE_DIGIT.map(int) + +LPAREN = spaceless_string("(") +RPAREN = spaceless_string(")") + +LBRACKET = spaceless_string("[") +RBRACKET = spaceless_string("]") + +LANGLE = spaceless_string("<") +RANGLE = spaceless_string(">") + +COMMA = spaceless_string(",") +COLON = spaceless_string(":") +SEMICOLON = spaceless_string(";") + +RAW_STRING = parsy.regex(_STRING_REGEX).map(ast.literal_eval) +FIELD = parsy.regex("[a-zA-Z_0-9]+") | parsy.string("") + @public @functools.lru_cache(maxsize=100) @@ -152,7 +174,7 @@ def geotype_parser(typ: type[dt.DataType]) -> dt.DataType: map = ( spaceless_string("map") .then(LANGLE) - .then(parsy.seq(primitive, COMMA.then(ty)).combine(dt.Map)) + .then(parsy.seq(ty, COMMA.then(ty)).combine(dt.Map)) .skip(RANGLE) ) diff --git a/ibis/expr/datatypes/tests/test_parse.py b/ibis/expr/datatypes/tests/test_parse.py index 51f4d9eeb4a6..48db2e835968 100644 --- a/ibis/expr/datatypes/tests/test_parse.py +++ b/ibis/expr/datatypes/tests/test_parse.py @@ -1,9 +1,14 @@ from __future__ import annotations +import string + +import hypothesis as h +import hypothesis.strategies as st import parsy import pytest import ibis.expr.datatypes as dt +import ibis.tests.strategies as its from ibis.common.annotations import ValidationError @@ -101,7 +106,8 @@ def test_parse_struct(): name: string, price: decimal(12, 2), discount_perc: decimal(12, 2), - shipdate: string + shipdate: string, + : bool >> >>""" expected = dt.Array( @@ -121,6 +127,7 @@ def test_parse_struct(): ("price", dt.Decimal(12, 2)), ("discount_perc", dt.Decimal(12, 2)), ("shipdate", dt.string), + ("", dt.boolean), ] ) ), @@ -169,9 +176,10 @@ def test_parse_empty_map_failure(): dt.dtype("map<>") -def test_parse_map_does_not_allow_non_primitive_keys(): - with pytest.raises(parsy.ParseError): - dt.dtype("map, double>") +def test_parse_map_allow_non_primitive_keys(): + assert dt.dtype("map, double>") == dt.Map( + dt.Array(dt.string), dt.double + ) def test_parse_timestamp_with_timezone_single_quote(): @@ -254,3 +262,33 @@ def test_parse_time(): def test_parse_null(): assert dt.parse("null") == dt.null + + +# corresponds to its.all_dtypes() but without: +# - geospacial types, the string representation is different from what the parser expects +# - struct types, the generated struct field names contain special characters + +field_names = st.text( + alphabet=st.characters( + whitelist_characters=string.ascii_letters + string.digits, + whitelist_categories=(), + ) +) + +roundtrippable_dtypes = st.deferred( + lambda: ( + its.primitive_dtypes() + | its.string_like_dtypes() + | its.temporal_dtypes() + | its.interval_dtype() + | its.variadic_dtypes() + | its.struct_dtypes(names=field_names) + | its.array_dtypes(roundtrippable_dtypes) + | its.map_dtypes(roundtrippable_dtypes, roundtrippable_dtypes) + ) +) + + +@h.given(roundtrippable_dtypes) +def test_parse_dtype_roundtrip(dtype): + assert dt.dtype(str(dtype)) == dtype diff --git a/ibis/tests/strategies.py b/ibis/tests/strategies.py index 4f1dfd75047c..cc051a000f80 100644 --- a/ibis/tests/strategies.py +++ b/ibis/tests/strategies.py @@ -149,28 +149,30 @@ def primitive_dtypes(nullable=_nullable): _item_strategy = primitive_dtypes() -def array_dtypes(item_strategy=_item_strategy, nullable=_nullable): - return st.builds(dt.Array, value_type=item_strategy, nullable=nullable) +def array_dtypes(value_type=_item_strategy, nullable=_nullable): + return st.builds(dt.Array, value_type=value_type, nullable=nullable) -def map_dtypes( - key_strategy=_item_strategy, value_strategy=_item_strategy, nullable=_nullable -): +def map_dtypes(key_type=_item_strategy, value_type=_item_strategy, nullable=_nullable): return st.builds( - dt.Map, key_type=key_strategy, value_type=value_strategy, nullable=nullable + dt.Map, key_type=key_type, value_type=value_type, nullable=nullable ) +_any_text = st.text() + + @st.composite def struct_dtypes( draw, - item_strategy=_item_strategy, + types=_item_strategy, + names=_any_text, num_fields=st.integers(min_value=0, max_value=20), # noqa: B008 nullable=_nullable, ): num_fields = draw(num_fields) - names = draw(st.lists(st.text(), min_size=num_fields, max_size=num_fields)) - types = draw(st.lists(item_strategy, min_size=num_fields, max_size=num_fields)) + names = draw(st.lists(names, min_size=num_fields, max_size=num_fields)) + types = draw(st.lists(types, min_size=num_fields, max_size=num_fields)) fields = dict(zip(names, types)) return dt.Struct(fields, nullable=draw(nullable))