Skip to content

Commit

Permalink
refactor(common): remove ibis.common.parse since it is only used by t…
Browse files Browse the repository at this point in the history
…he datatype parser
  • Loading branch information
kszucs authored and cpcloud committed Sep 2, 2023
1 parent 0959758 commit 557414f
Show file tree
Hide file tree
Showing 5 changed files with 94 additions and 76 deletions.
44 changes: 0 additions & 44 deletions ibis/common/parsing.py

This file was deleted.

2 changes: 1 addition & 1 deletion ibis/expr/datatypes/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -825,7 +825,7 @@ def resolution(self):

@property
def _pretty_piece(self) -> str:
return f"({self.unit!r})"
return f"('{self.unit.value}')"


@public
Expand Down
58 changes: 40 additions & 18 deletions ibis/expr/datatypes/parse.py
Original file line number Diff line number Diff line change
@@ -1,31 +1,53 @@
from __future__ import annotations

import ast
import functools
import re
from operator import methodcaller

import parsy
from public import public

import ibis.expr.datatypes.core as dt
from ibis.common.parsing import (
COLON,
COMMA,
FIELD,
LANGLE,
LPAREN,
NUMBER,
PRECISION,
RANGLE,
RAW_NUMBER,
RAW_STRING,
RPAREN,
SCALE,
SEMICOLON,
SINGLE_DIGIT,
spaceless,
spaceless_string,

_STRING_REGEX = (
"""('[^\n'\\\\]*(?:\\\\.[^\n'\\\\]*)*'|"[^\n"\\\\"]*(?:\\\\.[^\n"\\\\]*)*")"""
)

SPACES = parsy.regex(r"\s*", re.MULTILINE)


def spaceless(parser):
return SPACES.then(parser).skip(SPACES)


def spaceless_string(*strings: str):
return spaceless(
parsy.alt(*(parsy.string(string, transform=str.lower) for string in strings))
)


SINGLE_DIGIT = parsy.decimal_digit
RAW_NUMBER = SINGLE_DIGIT.at_least(1).concat()
PRECISION = SCALE = NUMBER = LENGTH = RAW_NUMBER.map(int)
TEMPORAL_SCALE = SINGLE_DIGIT.map(int)

LPAREN = spaceless_string("(")
RPAREN = spaceless_string(")")

LBRACKET = spaceless_string("[")
RBRACKET = spaceless_string("]")

LANGLE = spaceless_string("<")
RANGLE = spaceless_string(">")

COMMA = spaceless_string(",")
COLON = spaceless_string(":")
SEMICOLON = spaceless_string(";")

RAW_STRING = parsy.regex(_STRING_REGEX).map(ast.literal_eval)
FIELD = parsy.regex("[a-zA-Z_0-9]+") | parsy.string("")


@public
@functools.lru_cache(maxsize=100)
Expand Down Expand Up @@ -152,7 +174,7 @@ def geotype_parser(typ: type[dt.DataType]) -> dt.DataType:
map = (
spaceless_string("map")
.then(LANGLE)
.then(parsy.seq(primitive, COMMA.then(ty)).combine(dt.Map))
.then(parsy.seq(ty, COMMA.then(ty)).combine(dt.Map))
.skip(RANGLE)
)

Expand Down
46 changes: 42 additions & 4 deletions ibis/expr/datatypes/tests/test_parse.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
from __future__ import annotations

import string

import hypothesis as h
import hypothesis.strategies as st
import parsy
import pytest

import ibis.expr.datatypes as dt
import ibis.tests.strategies as its
from ibis.common.annotations import ValidationError


Expand Down Expand Up @@ -101,7 +106,8 @@ def test_parse_struct():
name: string,
price: decimal(12, 2),
discount_perc: decimal(12, 2),
shipdate: string
shipdate: string,
: bool
>>
>>"""
expected = dt.Array(
Expand All @@ -121,6 +127,7 @@ def test_parse_struct():
("price", dt.Decimal(12, 2)),
("discount_perc", dt.Decimal(12, 2)),
("shipdate", dt.string),
("", dt.boolean),
]
)
),
Expand Down Expand Up @@ -169,9 +176,10 @@ def test_parse_empty_map_failure():
dt.dtype("map<>")


def test_parse_map_does_not_allow_non_primitive_keys():
with pytest.raises(parsy.ParseError):
dt.dtype("map<array<string>, double>")
def test_parse_map_allow_non_primitive_keys():
assert dt.dtype("map<array<string>, double>") == dt.Map(
dt.Array(dt.string), dt.double
)


def test_parse_timestamp_with_timezone_single_quote():
Expand Down Expand Up @@ -254,3 +262,33 @@ def test_parse_time():

def test_parse_null():
assert dt.parse("null") == dt.null


# corresponds to its.all_dtypes() but without:
# - geospacial types, the string representation is different from what the parser expects
# - struct types, the generated struct field names contain special characters

field_names = st.text(
alphabet=st.characters(
whitelist_characters=string.ascii_letters + string.digits,
whitelist_categories=(),
)
)

roundtrippable_dtypes = st.deferred(
lambda: (
its.primitive_dtypes()
| its.string_like_dtypes()
| its.temporal_dtypes()
| its.interval_dtype()
| its.variadic_dtypes()
| its.struct_dtypes(names=field_names)
| its.array_dtypes(roundtrippable_dtypes)
| its.map_dtypes(roundtrippable_dtypes, roundtrippable_dtypes)
)
)


@h.given(roundtrippable_dtypes)
def test_parse_dtype_roundtrip(dtype):
assert dt.dtype(str(dtype)) == dtype
20 changes: 11 additions & 9 deletions ibis/tests/strategies.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,28 +149,30 @@ def primitive_dtypes(nullable=_nullable):
_item_strategy = primitive_dtypes()


def array_dtypes(item_strategy=_item_strategy, nullable=_nullable):
return st.builds(dt.Array, value_type=item_strategy, nullable=nullable)
def array_dtypes(value_type=_item_strategy, nullable=_nullable):
return st.builds(dt.Array, value_type=value_type, nullable=nullable)


def map_dtypes(
key_strategy=_item_strategy, value_strategy=_item_strategy, nullable=_nullable
):
def map_dtypes(key_type=_item_strategy, value_type=_item_strategy, nullable=_nullable):
return st.builds(
dt.Map, key_type=key_strategy, value_type=value_strategy, nullable=nullable
dt.Map, key_type=key_type, value_type=value_type, nullable=nullable
)


_any_text = st.text()


@st.composite
def struct_dtypes(
draw,
item_strategy=_item_strategy,
types=_item_strategy,
names=_any_text,
num_fields=st.integers(min_value=0, max_value=20), # noqa: B008
nullable=_nullable,
):
num_fields = draw(num_fields)
names = draw(st.lists(st.text(), min_size=num_fields, max_size=num_fields))
types = draw(st.lists(item_strategy, min_size=num_fields, max_size=num_fields))
names = draw(st.lists(names, min_size=num_fields, max_size=num_fields))
types = draw(st.lists(types, min_size=num_fields, max_size=num_fields))
fields = dict(zip(names, types))
return dt.Struct(fields, nullable=draw(nullable))

Expand Down

0 comments on commit 557414f

Please sign in to comment.