From 1c91d6590f3ec2e358f6d77d6bcf9b9f05d889f9 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Thu, 19 Sep 2024 10:47:20 -0400 Subject: [PATCH] fix(datatype-parsing): ensure that geospatial types are round trippable through the data type parser (#10171) Fixes #10170. --- ibis/expr/datatypes/parse.py | 27 ++++++++++++++++++------- ibis/expr/datatypes/tests/test_parse.py | 2 +- 2 files changed, 21 insertions(+), 8 deletions(-) diff --git a/ibis/expr/datatypes/parse.py b/ibis/expr/datatypes/parse.py index 22ec355333bb..dc709d64e31d 100644 --- a/ibis/expr/datatypes/parse.py +++ b/ibis/expr/datatypes/parse.py @@ -1,8 +1,8 @@ from __future__ import annotations import ast -import functools import re +from functools import lru_cache, partial from operator import methodcaller import parsy @@ -50,7 +50,7 @@ def spaceless_string(*strings: str): @public -@functools.lru_cache(maxsize=100) +@lru_cache(maxsize=100) def parse( text: str, default_decimal_parameters: tuple[int | None, int | None] = (None, None) ) -> dt.DataType: @@ -88,12 +88,15 @@ def parse( geotype = spaceless_string("geography", "geometry") srid_geotype = SEMICOLON.then(parsy.seq(srid=NUMBER.skip(COLON), geotype=geotype)) + geotype_srid = COLON.then(parsy.seq(geotype=geotype, srid=SEMICOLON.then(NUMBER))) geotype_part = COLON.then(parsy.seq(geotype=geotype)) srid_part = SEMICOLON.then(parsy.seq(srid=NUMBER)) def geotype_parser(typ: type[dt.DataType]) -> dt.DataType: return spaceless_string(typ.__name__.lower()).then( - (srid_geotype | geotype_part | srid_part).optional(dict()).combine_dict(typ) + (srid_geotype | geotype_srid | geotype_part | srid_part) + .optional(dict()) + .combine_dict(typ) ) primitive = ( @@ -116,15 +119,25 @@ def geotype_parser(typ: type[dt.DataType]) -> dt.DataType: "time", "date", "null", - ).map(functools.partial(getattr, dt)) - | spaceless_string("bytes").result(dt.binary) - | geotype.map(dt.GeoSpatial) + ).map(partial(getattr, dt)) | geotype_parser(dt.LineString) | geotype_parser(dt.Polygon) | geotype_parser(dt.Point) | geotype_parser(dt.MultiLineString) | geotype_parser(dt.MultiPolygon) | geotype_parser(dt.MultiPoint) + | spaceless_string("bytes").result(dt.binary) + | spaceless_string("geospatial:geography").then( + srid_part.optional(dict()).combine_dict( + partial(dt.GeoSpatial, geotype="geography") + ) + ) + | spaceless_string("geospatial:geometry").then( + srid_part.optional(dict()).combine_dict( + partial(dt.GeoSpatial, geotype="geometry") + ) + ) + | geotype.map(dt.GeoSpatial) ) varchar_or_char = ( @@ -201,7 +214,7 @@ def geotype_parser(typ: type[dt.DataType]) -> dt.DataType: | map | struct | spaceless_string("jsonb", "json", "uuid", "macaddr", "inet").map( - functools.partial(getattr, dt) + partial(getattr, dt) ) | spaceless_string("int").result(dt.int64) | spaceless_string("str").result(dt.string) diff --git a/ibis/expr/datatypes/tests/test_parse.py b/ibis/expr/datatypes/tests/test_parse.py index 7666ae95cfb1..b020f96d4ca3 100644 --- a/ibis/expr/datatypes/tests/test_parse.py +++ b/ibis/expr/datatypes/tests/test_parse.py @@ -266,7 +266,6 @@ def test_parse_null(): # corresponds to its.all_dtypes() but without: -# - geospacial types, the string representation is different from what the parser expects # - struct types, the generated struct field names contain special characters field_names = st.text( @@ -286,6 +285,7 @@ def test_parse_null(): | its.struct_dtypes(names=field_names) | its.array_dtypes(roundtrippable_dtypes) | its.map_dtypes(roundtrippable_dtypes, roundtrippable_dtypes) + | its.geospatial_dtypes() ) )