From d4d54824f932a24e625e7b8439a0a17e7e5e4096 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Wed, 10 Jul 2024 21:05:16 +0000 Subject: [PATCH 01/17] maybe export geospatial/geoarrow type --- ibis/formats/pyarrow.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/ibis/formats/pyarrow.py b/ibis/formats/pyarrow.py index 94610a533e14..9f5c59ddbb44 100644 --- a/ibis/formats/pyarrow.py +++ b/ibis/formats/pyarrow.py @@ -3,6 +3,8 @@ import functools from typing import TYPE_CHECKING, Any +from geoarrow import types as geoarrow_types + import ibis.expr.datatypes as dt from ibis.expr.schema import Schema from ibis.formats import DataMapper, SchemaMapper, TableProxy, TypeMapper @@ -14,6 +16,12 @@ import pyarrow as pa +# Probably a better place for this +from geoarrow.types.type_pyarrow import register_extension_types + +register_extension_types() + + @functools.cache def _from_pyarrow_types(): import pyarrow as pa @@ -175,7 +183,15 @@ def from_ibis(cls, dtype: dt.DataType) -> pa.DataType: ) return pa.map_(key_field, value_field, keys_sorted=False) elif dtype.is_geospatial(): - return pa.binary() + if dtype.srid is None: + crs = None + elif dtype.srid == 4326: + crs = geoarrow_types.OGC_CRS84 + else: + # Warn for dropped CRS? + crs = None + + return geoarrow_types.wkb(crs=crs).to_pyarrow() else: try: return _to_pyarrow_types()[type(dtype)] From 445af69473f55c7d6ff27186fe5f186045b2b943 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Wed, 10 Jul 2024 21:26:32 +0000 Subject: [PATCH 02/17] maybe roundtrip types --- ibis/formats/pyarrow.py | 35 +++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/ibis/formats/pyarrow.py b/ibis/formats/pyarrow.py index 9f5c59ddbb44..ad2badbcf380 100644 --- a/ibis/formats/pyarrow.py +++ b/ibis/formats/pyarrow.py @@ -4,6 +4,7 @@ from typing import TYPE_CHECKING, Any from geoarrow import types as geoarrow_types +from geoarrow.types.type_pyarrow import GeometryExtensionType import ibis.expr.datatypes as dt from ibis.expr.schema import Schema @@ -121,6 +122,28 @@ def to_ibis(cls, typ: pa.DataType, nullable=True) -> dt.DataType: return dt.Map(key_dtype, value_dtype, nullable=nullable) elif pa.types.is_dictionary(typ): return cls.to_ibis(typ.value_type) + elif isinstance(typ.value_type, GeometryExtensionType): + auth_code = None + if typ.value_type.crs is not None: + crs_dict = typ.value_type.crs.to_json_dict() + if "id" in crs_dict: + crs_id = crs_dict["id"] + if "authority" in crs_id and "code" in crs_id: + auth_code = f"{crs_id["authority"]}:{crs_id["code"]}" + + if auth_code is None: + srid = None + elif auth_code == "OGC:CRS84": + srid = 4326 + else: + srid = crs_id["code"] + + if typ.value_type.edge_type == geoarrow_types.EdgeType.SPHERICAL: + geotype = "geography" + else: + geotype = "geometry" + + return dt.GeoSpatial(typ.value_field.nullable, geotype, srid) else: return _from_pyarrow_types()[typ](nullable=nullable) @@ -183,15 +206,23 @@ def from_ibis(cls, dtype: dt.DataType) -> pa.DataType: ) return pa.map_(key_field, value_field, keys_sorted=False) elif dtype.is_geospatial(): + # Resolve CRS if dtype.srid is None: crs = None elif dtype.srid == 4326: crs = geoarrow_types.OGC_CRS84 else: - # Warn for dropped CRS? + # Warn for dropped CRS? Or geoarrow.types would need a lookup table + # for srid -> PROJJSON crs = None - return geoarrow_types.wkb(crs=crs).to_pyarrow() + # Resolve edge type + if dtype.geotype == "geography": + edge_type = geoarrow_types.EdgeType.SPHERICAL + else: + edge_type = geoarrow_types.EdgeType.PLANAR + + return geoarrow_types.wkb(crs=crs, edge_type=edge_type).to_pyarrow() else: try: return _to_pyarrow_types()[type(dtype)] From cda1d4bb17fbce7bd6d1d60add93dded4d453a8f Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Wed, 10 Jul 2024 21:38:16 +0000 Subject: [PATCH 03/17] add some todos --- ibis/formats/pyarrow.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ibis/formats/pyarrow.py b/ibis/formats/pyarrow.py index ad2badbcf380..99ed5be5c8cc 100644 --- a/ibis/formats/pyarrow.py +++ b/ibis/formats/pyarrow.py @@ -17,7 +17,7 @@ import pyarrow as pa -# Probably a better place for this +# TODO: Probably a better place for this from geoarrow.types.type_pyarrow import register_extension_types register_extension_types() From 13c343c208bfe9a3bf230b4be780cbb4f6401498 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Fri, 12 Jul 2024 08:01:38 -0700 Subject: [PATCH 04/17] chore(deps): add geoarrow-types as an optional dependency --- pyproject.toml | 3 ++- requirements-dev.txt | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index f190c4e95b37..3971126edb1c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -64,6 +64,7 @@ db-dtypes = { version = ">=0.3,<2", optional = true } deltalake = { version = ">=0.9.0,<1", optional = true } duckdb = { version = ">=0.8.1,<2", optional = true } geopandas = { version = ">=0.6,<2", optional = true } +geoarrow-types = { version = ">=0.2,<1", optional = true } google-cloud-bigquery = { version = ">=3,<4", optional = true } google-cloud-bigquery-storage = { version = ">=2,<3", optional = true } graphviz = { version = ">=0.16,<1", optional = true } @@ -173,7 +174,7 @@ visualization = ["graphviz"] decompiler = ["black"] deltalake = ["deltalake"] examples = ["pins", "fsspec"] -geospatial = ["geopandas", "shapely"] +geospatial = ["geoarrow-types", "geopandas", "shapely"] [tool.poetry.plugins."ibis.backends"] bigquery = "ibis.backends.bigquery" diff --git a/requirements-dev.txt b/requirements-dev.txt index 8dcc0da9c755..1134db74adb4 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -63,6 +63,7 @@ fqdn==1.5.1 ; python_version >= "3.10" and python_version < "3.13" frozenlist==1.4.1 ; python_version >= "3.10" and python_version < "4.0" fsspec==2024.6.1 ; python_version >= "3.10" and python_version < "4.0" gcsfs==2024.6.1 ; python_version >= "3.10" and python_version < "4.0" +geoarrow-types==0.2.0 ; python_version >= "3.10" and python_version < "4.0" geopandas==1.0.1 ; python_version >= "3.10" and python_version < "4.0" google-api-core==2.19.1 ; python_version >= "3.10" and python_version < "4.0" google-api-core[grpc]==2.19.1 ; python_version >= "3.10" and python_version < "4.0" From 1e1ec05764ef68da43e88a3d1c367499194f2e07 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Fri, 12 Jul 2024 08:06:54 -0700 Subject: [PATCH 05/17] chore: fix lint --- ibis/formats/pyarrow.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ibis/formats/pyarrow.py b/ibis/formats/pyarrow.py index 99ed5be5c8cc..c6893e4df5fe 100644 --- a/ibis/formats/pyarrow.py +++ b/ibis/formats/pyarrow.py @@ -129,7 +129,7 @@ def to_ibis(cls, typ: pa.DataType, nullable=True) -> dt.DataType: if "id" in crs_dict: crs_id = crs_dict["id"] if "authority" in crs_id and "code" in crs_id: - auth_code = f"{crs_id["authority"]}:{crs_id["code"]}" + auth_code = f"{crs_id['authority']}:{crs_id['code']}" if auth_code is None: srid = None From 63e46e9a82be1658fadf697bb686058bca3d8114 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Fri, 12 Jul 2024 08:12:32 -0700 Subject: [PATCH 06/17] chore: move imports around --- ibis/formats/pyarrow.py | 68 ++++++++++++++++++++--------------------- 1 file changed, 34 insertions(+), 34 deletions(-) diff --git a/ibis/formats/pyarrow.py b/ibis/formats/pyarrow.py index c6893e4df5fe..8bc3873cfc19 100644 --- a/ibis/formats/pyarrow.py +++ b/ibis/formats/pyarrow.py @@ -3,9 +3,6 @@ import functools from typing import TYPE_CHECKING, Any -from geoarrow import types as geoarrow_types -from geoarrow.types.type_pyarrow import GeometryExtensionType - import ibis.expr.datatypes as dt from ibis.expr.schema import Schema from ibis.formats import DataMapper, SchemaMapper, TableProxy, TypeMapper @@ -17,11 +14,6 @@ import pyarrow as pa -# TODO: Probably a better place for this -from geoarrow.types.type_pyarrow import register_extension_types - -register_extension_types() - @functools.cache def _from_pyarrow_types(): @@ -122,29 +114,35 @@ def to_ibis(cls, typ: pa.DataType, nullable=True) -> dt.DataType: return dt.Map(key_dtype, value_dtype, nullable=nullable) elif pa.types.is_dictionary(typ): return cls.to_ibis(typ.value_type) - elif isinstance(typ.value_type, GeometryExtensionType): - auth_code = None - if typ.value_type.crs is not None: - crs_dict = typ.value_type.crs.to_json_dict() - if "id" in crs_dict: - crs_id = crs_dict["id"] - if "authority" in crs_id and "code" in crs_id: - auth_code = f"{crs_id['authority']}:{crs_id['code']}" - - if auth_code is None: - srid = None - elif auth_code == "OGC:CRS84": - srid = 4326 - else: - srid = crs_id["code"] - - if typ.value_type.edge_type == geoarrow_types.EdgeType.SPHERICAL: - geotype = "geography" - else: - geotype = "geometry" - - return dt.GeoSpatial(typ.value_field.nullable, geotype, srid) else: + from geoarrow import types as gat + + gat.type_pyarrow.register_extension_types() + + if isinstance( + value_type := typ.value_type, gat.type_pyarrow.GeometryExtensionType + ): + auth_code = None + if value_type.crs is not None: + crs_dict = value_type.crs.to_json_dict() + if "id" in crs_dict: + crs_id = crs_dict["id"] + if "authority" in crs_id and "code" in crs_id: + auth_code = f"{crs_id['authority']}:{crs_id['code']}" + + if auth_code is None: + srid = None + elif auth_code == "OGC:CRS84": + srid = 4326 + else: + srid = crs_id["code"] + + if value_type.edge_type == gat.EdgeType.SPHERICAL: + geotype = "geography" + else: + geotype = "geometry" + + return dt.GeoSpatial(typ.value_field.nullable, geotype, srid) return _from_pyarrow_types()[typ](nullable=nullable) @classmethod @@ -206,11 +204,13 @@ def from_ibis(cls, dtype: dt.DataType) -> pa.DataType: ) return pa.map_(key_field, value_field, keys_sorted=False) elif dtype.is_geospatial(): + from geoarrow import types as gat + # Resolve CRS if dtype.srid is None: crs = None elif dtype.srid == 4326: - crs = geoarrow_types.OGC_CRS84 + crs = gat.OGC_CRS84 else: # Warn for dropped CRS? Or geoarrow.types would need a lookup table # for srid -> PROJJSON @@ -218,11 +218,11 @@ def from_ibis(cls, dtype: dt.DataType) -> pa.DataType: # Resolve edge type if dtype.geotype == "geography": - edge_type = geoarrow_types.EdgeType.SPHERICAL + edge_type = gat.EdgeType.SPHERICAL else: - edge_type = geoarrow_types.EdgeType.PLANAR + edge_type = gat.EdgeType.PLANAR - return geoarrow_types.wkb(crs=crs, edge_type=edge_type).to_pyarrow() + return gat.wkb(crs=crs, edge_type=edge_type).to_pyarrow() else: try: return _to_pyarrow_types()[type(dtype)] From 2f78405f9a478a22a3925fa2ac99cbc292544964 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Fri, 12 Jul 2024 20:07:35 +0000 Subject: [PATCH 07/17] maybe fix import --- ibis/formats/pyarrow.py | 52 ++++++++++++++++++++--------------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/ibis/formats/pyarrow.py b/ibis/formats/pyarrow.py index 8bc3873cfc19..7d423b6741b1 100644 --- a/ibis/formats/pyarrow.py +++ b/ibis/formats/pyarrow.py @@ -14,7 +14,6 @@ import pyarrow as pa - @functools.cache def _from_pyarrow_types(): import pyarrow as pa @@ -114,35 +113,36 @@ def to_ibis(cls, typ: pa.DataType, nullable=True) -> dt.DataType: return dt.Map(key_dtype, value_dtype, nullable=nullable) elif pa.types.is_dictionary(typ): return cls.to_ibis(typ.value_type) - else: + elif ( + isinstance(value_type := typ.value_type, pa.ExtensionType) + and type(value_type).__name__ == "GeometryExtensionType" + ): from geoarrow import types as gat gat.type_pyarrow.register_extension_types() - if isinstance( - value_type := typ.value_type, gat.type_pyarrow.GeometryExtensionType - ): - auth_code = None - if value_type.crs is not None: - crs_dict = value_type.crs.to_json_dict() - if "id" in crs_dict: - crs_id = crs_dict["id"] - if "authority" in crs_id and "code" in crs_id: - auth_code = f"{crs_id['authority']}:{crs_id['code']}" - - if auth_code is None: - srid = None - elif auth_code == "OGC:CRS84": - srid = 4326 - else: - srid = crs_id["code"] - - if value_type.edge_type == gat.EdgeType.SPHERICAL: - geotype = "geography" - else: - geotype = "geometry" - - return dt.GeoSpatial(typ.value_field.nullable, geotype, srid) + auth_code = None + if value_type.crs is not None: + crs_dict = value_type.crs.to_json_dict() + if "id" in crs_dict: + crs_id = crs_dict["id"] + if "authority" in crs_id and "code" in crs_id: + auth_code = f"{crs_id['authority']}:{crs_id['code']}" + + if auth_code is None: + srid = None + elif auth_code == "OGC:CRS84": + srid = 4326 + else: + srid = crs_id["code"] + + if value_type.edge_type == gat.EdgeType.SPHERICAL: + geotype = "geography" + else: + geotype = "geometry" + + return dt.GeoSpatial(typ.value_field.nullable, geotype, srid) + else: return _from_pyarrow_types()[typ](nullable=nullable) @classmethod From 508e452c4875fa83872ed560c94e7927c64f1da6 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Fri, 12 Jul 2024 20:22:54 +0000 Subject: [PATCH 08/17] maybe fix tests --- ibis/formats/pyarrow.py | 10 +++++----- ibis/formats/tests/test_pyarrow.py | 6 ++++-- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/ibis/formats/pyarrow.py b/ibis/formats/pyarrow.py index 7d423b6741b1..8a5d19f45619 100644 --- a/ibis/formats/pyarrow.py +++ b/ibis/formats/pyarrow.py @@ -114,16 +114,16 @@ def to_ibis(cls, typ: pa.DataType, nullable=True) -> dt.DataType: elif pa.types.is_dictionary(typ): return cls.to_ibis(typ.value_type) elif ( - isinstance(value_type := typ.value_type, pa.ExtensionType) - and type(value_type).__name__ == "GeometryExtensionType" + isinstance(typ, pa.ExtensionType) + and type(typ).__name__ == "GeometryExtensionType" ): from geoarrow import types as gat gat.type_pyarrow.register_extension_types() auth_code = None - if value_type.crs is not None: - crs_dict = value_type.crs.to_json_dict() + if typ.crs is not None: + crs_dict = typ.crs.to_json_dict() if "id" in crs_dict: crs_id = crs_dict["id"] if "authority" in crs_id and "code" in crs_id: @@ -136,7 +136,7 @@ def to_ibis(cls, typ: pa.DataType, nullable=True) -> dt.DataType: else: srid = crs_id["code"] - if value_type.edge_type == gat.EdgeType.SPHERICAL: + if typ.edge_type == gat.EdgeType.SPHERICAL: geotype = "geography" else: geotype = "geometry" diff --git a/ibis/formats/tests/test_pyarrow.py b/ibis/formats/tests/test_pyarrow.py index 015923af4ca5..6b3b445a0842 100644 --- a/ibis/formats/tests/test_pyarrow.py +++ b/ibis/formats/tests/test_pyarrow.py @@ -188,5 +188,7 @@ def test_unknown_dtype_gets_converted_to_string(): pytest.param(dt.multipolygon, id="multipolygon"), ], ) -def test_geo_gets_converted_to_binary(ibis_type): - assert PyArrowType.from_ibis(ibis_type) == pa.binary() +def test_geo_gets_converted_to_geoarrow(ibis_type): + from geoarrow.types.type_pyarrow import GeometryExtensionType + + assert isinstance(PyArrowType.from_ibis(ibis_type), GeometryExtensionType) From 3da0afd48697c374b25eccda99344f1f51745bd7 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Sat, 13 Jul 2024 05:46:28 +0000 Subject: [PATCH 09/17] maybe fix skip --- ibis/formats/tests/test_pyarrow.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ibis/formats/tests/test_pyarrow.py b/ibis/formats/tests/test_pyarrow.py index 6b3b445a0842..2b7697fba80f 100644 --- a/ibis/formats/tests/test_pyarrow.py +++ b/ibis/formats/tests/test_pyarrow.py @@ -189,6 +189,6 @@ def test_unknown_dtype_gets_converted_to_string(): ], ) def test_geo_gets_converted_to_geoarrow(ibis_type): - from geoarrow.types.type_pyarrow import GeometryExtensionType + type_pyarrow = pytest.importorskip("geoarrow.types.type_pyarrow") - assert isinstance(PyArrowType.from_ibis(ibis_type), GeometryExtensionType) + assert isinstance(PyArrowType.from_ibis(ibis_type), type_pyarrow.GeometryExtensionType) From 9f379dfc4729218cc97dc7b299c9b45d06df6b19 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Sat, 13 Jul 2024 05:47:09 +0000 Subject: [PATCH 10/17] format --- ibis/formats/tests/test_pyarrow.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ibis/formats/tests/test_pyarrow.py b/ibis/formats/tests/test_pyarrow.py index 2b7697fba80f..4b7d6ea2f65d 100644 --- a/ibis/formats/tests/test_pyarrow.py +++ b/ibis/formats/tests/test_pyarrow.py @@ -191,4 +191,6 @@ def test_unknown_dtype_gets_converted_to_string(): def test_geo_gets_converted_to_geoarrow(ibis_type): type_pyarrow = pytest.importorskip("geoarrow.types.type_pyarrow") - assert isinstance(PyArrowType.from_ibis(ibis_type), type_pyarrow.GeometryExtensionType) + assert isinstance( + PyArrowType.from_ibis(ibis_type), type_pyarrow.GeometryExtensionType + ) From 9d2fb0809830d0aa9f89ce31989b766a8c7b6461 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Sun, 14 Jul 2024 02:20:49 +0000 Subject: [PATCH 11/17] tests --- ibis/formats/pyarrow.py | 29 ++++++++++++++++++---- ibis/formats/tests/test_pyarrow.py | 40 ++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+), 5 deletions(-) diff --git a/ibis/formats/pyarrow.py b/ibis/formats/pyarrow.py index 8a5d19f45619..94ed4c90eba4 100644 --- a/ibis/formats/pyarrow.py +++ b/ibis/formats/pyarrow.py @@ -115,7 +115,7 @@ def to_ibis(cls, typ: pa.DataType, nullable=True) -> dt.DataType: return cls.to_ibis(typ.value_type) elif ( isinstance(typ, pa.ExtensionType) - and type(typ).__name__ == "GeometryExtensionType" + and type(typ).__module__ == "geoarrow.types.type_pyarrow" ): from geoarrow import types as gat @@ -127,21 +127,40 @@ def to_ibis(cls, typ: pa.DataType, nullable=True) -> dt.DataType: if "id" in crs_dict: crs_id = crs_dict["id"] if "authority" in crs_id and "code" in crs_id: - auth_code = f"{crs_id['authority']}:{crs_id['code']}" + auth_code = (crs_id["authority"], crs_id["code"]) + + if typ.crs is not None and auth_code is None: + # It is possible to have PROJJSON that does not have an authority/code + # attached, either because the producer didn't have that information + # (e.g., because they were reading a older shapefile). In this case, + # pyproj can often guess the authority/code. + import pyproj + + auth_code = pyproj.CRS(typ.crs.to_json()).to_authority() + if auth_code is None: + raise ValueError(f"Can't resolve SRID of crs {typ.crs}") if auth_code is None: srid = None - elif auth_code == "OGC:CRS84": + elif auth_code == ("OGC", "CRS84"): + # OGC:CRS84 and EPSG:4326 are identical except for the order of + # coordinates (i.e., lon lat vs. lat lon) in their official definition. + # This axis ordering is ignored in all but the most obscure scenarios + # such that these are identical. OGC:CRS84 is more correct, but EPSG:4326 + # is more common. srid = 4326 else: - srid = crs_id["code"] + # This works because the two most common srid authorities are EPSG and ESRI + # and the "codes" are all integers and don't intersect with eachother on + # purpose. This won't scale to something like OGC:CRS27 (not common). + srid = int(auth_code[1]) if typ.edge_type == gat.EdgeType.SPHERICAL: geotype = "geography" else: geotype = "geometry" - return dt.GeoSpatial(typ.value_field.nullable, geotype, srid) + return dt.GeoSpatial(geotype, srid, nullable) else: return _from_pyarrow_types()[typ](nullable=nullable) diff --git a/ibis/formats/tests/test_pyarrow.py b/ibis/formats/tests/test_pyarrow.py index 4b7d6ea2f65d..ad0426dc5c5d 100644 --- a/ibis/formats/tests/test_pyarrow.py +++ b/ibis/formats/tests/test_pyarrow.py @@ -194,3 +194,43 @@ def test_geo_gets_converted_to_geoarrow(ibis_type): assert isinstance( PyArrowType.from_ibis(ibis_type), type_pyarrow.GeometryExtensionType ) + +def test_geoarrow_gets_converted_to_geo(): + gat = pytest.importorskip("geoarrow.types") + + import geoarrow.types as gat + + ibis_type = PyArrowType.to_ibis(gat.wkb().to_pyarrow()) + assert ibis_type.is_geospatial() + assert ibis_type.geotype == "geometry" + assert ibis_type.srid is None + assert ibis_type.nullable is True + + ibis_type = PyArrowType.to_ibis(gat.wkb(edge_type=gat.EdgeType.SPHERICAL).to_pyarrow()) + assert ibis_type.geotype == "geography" + + ibis_type = PyArrowType.to_ibis(gat.wkb().to_pyarrow(), nullable=False) + assert ibis_type.nullable is False + +def test_geoarrow_crs_gets_converted_to_geo(): + gat = pytest.importorskip("geoarrow.types") + pyproj = pytest.importorskip("pyproj") + + import geoarrow.types as gat + + # Check the GeoArrow/GeoParquet standard representation of longitude/latitude + ibis_type = PyArrowType.to_ibis(gat.wkb(crs=gat.OGC_CRS84).to_pyarrow()) + assert ibis_type.srid == 4326 + + # Check a standard representation of lon/lat that happens to be missing the + # explicit authority/code section of the PROJJSON (i.e., make pyproj guess + # the srid for us) + lonlat_crs = gat.OGC_CRS84.to_json_dict() + del lonlat_crs["id"] + ibis_type = PyArrowType.to_ibis(gat.wkb(crs=lonlat_crs).to_pyarrow()) + assert ibis_type.srid == 4326 + + # Check a non-lon/lat CRS (e.g., UTM Zone 20N) + utm_20n = pyproj.CRS("EPSG:32620") + ibis_type = PyArrowType.to_ibis(gat.wkb(crs=utm_20n).to_pyarrow()) + assert ibis_type.srid == 32620 From 68d0ed258489481451fc7a18f30e550a5e2a5b8c Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Sun, 14 Jul 2024 02:21:21 +0000 Subject: [PATCH 12/17] format --- ibis/formats/tests/test_pyarrow.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/ibis/formats/tests/test_pyarrow.py b/ibis/formats/tests/test_pyarrow.py index ad0426dc5c5d..1c3b3568ecb8 100644 --- a/ibis/formats/tests/test_pyarrow.py +++ b/ibis/formats/tests/test_pyarrow.py @@ -195,6 +195,7 @@ def test_geo_gets_converted_to_geoarrow(ibis_type): PyArrowType.from_ibis(ibis_type), type_pyarrow.GeometryExtensionType ) + def test_geoarrow_gets_converted_to_geo(): gat = pytest.importorskip("geoarrow.types") @@ -206,12 +207,15 @@ def test_geoarrow_gets_converted_to_geo(): assert ibis_type.srid is None assert ibis_type.nullable is True - ibis_type = PyArrowType.to_ibis(gat.wkb(edge_type=gat.EdgeType.SPHERICAL).to_pyarrow()) + ibis_type = PyArrowType.to_ibis( + gat.wkb(edge_type=gat.EdgeType.SPHERICAL).to_pyarrow() + ) assert ibis_type.geotype == "geography" ibis_type = PyArrowType.to_ibis(gat.wkb().to_pyarrow(), nullable=False) assert ibis_type.nullable is False + def test_geoarrow_crs_gets_converted_to_geo(): gat = pytest.importorskip("geoarrow.types") pyproj = pytest.importorskip("pyproj") From cfee95bce92782bf99f67fb3f2d10d4eafdb6c68 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Sun, 14 Jul 2024 02:22:47 +0000 Subject: [PATCH 13/17] remove non-skip import --- ibis/formats/tests/test_pyarrow.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/ibis/formats/tests/test_pyarrow.py b/ibis/formats/tests/test_pyarrow.py index 1c3b3568ecb8..c291dce710de 100644 --- a/ibis/formats/tests/test_pyarrow.py +++ b/ibis/formats/tests/test_pyarrow.py @@ -199,8 +199,6 @@ def test_geo_gets_converted_to_geoarrow(ibis_type): def test_geoarrow_gets_converted_to_geo(): gat = pytest.importorskip("geoarrow.types") - import geoarrow.types as gat - ibis_type = PyArrowType.to_ibis(gat.wkb().to_pyarrow()) assert ibis_type.is_geospatial() assert ibis_type.geotype == "geometry" @@ -220,8 +218,6 @@ def test_geoarrow_crs_gets_converted_to_geo(): gat = pytest.importorskip("geoarrow.types") pyproj = pytest.importorskip("pyproj") - import geoarrow.types as gat - # Check the GeoArrow/GeoParquet standard representation of longitude/latitude ibis_type = PyArrowType.to_ibis(gat.wkb(crs=gat.OGC_CRS84).to_pyarrow()) assert ibis_type.srid == 4326 From 9ed106e276d39b5fab14711b99e7cbf6d0b02d59 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Sun, 14 Jul 2024 02:53:00 +0000 Subject: [PATCH 14/17] check roundtrip --- ibis/formats/pyarrow.py | 9 ++++++--- ibis/formats/tests/test_pyarrow.py | 22 +++++++++++++++------- 2 files changed, 21 insertions(+), 10 deletions(-) diff --git a/ibis/formats/pyarrow.py b/ibis/formats/pyarrow.py index 94ed4c90eba4..c0fb4acf2c30 100644 --- a/ibis/formats/pyarrow.py +++ b/ibis/formats/pyarrow.py @@ -231,9 +231,12 @@ def from_ibis(cls, dtype: dt.DataType) -> pa.DataType: elif dtype.srid == 4326: crs = gat.OGC_CRS84 else: - # Warn for dropped CRS? Or geoarrow.types would need a lookup table - # for srid -> PROJJSON - crs = None + import pyproj + + # Assume that these are EPSG codes. An srid is more accurately a key + # into a backend/connection-specific lookup table; however, most usage + # should work with this assumption. + crs = pyproj.CRS(f"EPSG:{dtype.srid}") # Resolve edge type if dtype.geotype == "geography": diff --git a/ibis/formats/tests/test_pyarrow.py b/ibis/formats/tests/test_pyarrow.py index c291dce710de..00a57480a26e 100644 --- a/ibis/formats/tests/test_pyarrow.py +++ b/ibis/formats/tests/test_pyarrow.py @@ -199,16 +199,18 @@ def test_geo_gets_converted_to_geoarrow(ibis_type): def test_geoarrow_gets_converted_to_geo(): gat = pytest.importorskip("geoarrow.types") - ibis_type = PyArrowType.to_ibis(gat.wkb().to_pyarrow()) + pyarrow_type = gat.wkb().to_pyarrow() + ibis_type = PyArrowType.to_ibis(pyarrow_type) assert ibis_type.is_geospatial() assert ibis_type.geotype == "geometry" assert ibis_type.srid is None assert ibis_type.nullable is True + assert ibis_type.to_pyarrow() == pyarrow_type - ibis_type = PyArrowType.to_ibis( - gat.wkb(edge_type=gat.EdgeType.SPHERICAL).to_pyarrow() - ) + pyarrow_type = gat.wkb(edge_type=gat.EdgeType.SPHERICAL).to_pyarrow() + ibis_type = PyArrowType.to_ibis(pyarrow_type) assert ibis_type.geotype == "geography" + assert ibis_type.to_pyarrow() == pyarrow_type ibis_type = PyArrowType.to_ibis(gat.wkb().to_pyarrow(), nullable=False) assert ibis_type.nullable is False @@ -219,18 +221,24 @@ def test_geoarrow_crs_gets_converted_to_geo(): pyproj = pytest.importorskip("pyproj") # Check the GeoArrow/GeoParquet standard representation of longitude/latitude - ibis_type = PyArrowType.to_ibis(gat.wkb(crs=gat.OGC_CRS84).to_pyarrow()) + pyarrow_type = gat.wkb(crs=gat.OGC_CRS84).to_pyarrow() + ibis_type = PyArrowType.to_ibis(pyarrow_type) assert ibis_type.srid == 4326 + assert ibis_type.to_pyarrow() == pyarrow_type # Check a standard representation of lon/lat that happens to be missing the # explicit authority/code section of the PROJJSON (i.e., make pyproj guess # the srid for us) lonlat_crs = gat.OGC_CRS84.to_json_dict() del lonlat_crs["id"] - ibis_type = PyArrowType.to_ibis(gat.wkb(crs=lonlat_crs).to_pyarrow()) + pyarrow_type = gat.wkb(crs=lonlat_crs).to_pyarrow() + ibis_type = PyArrowType.to_ibis(pyarrow_type) assert ibis_type.srid == 4326 + assert ibis_type.to_pyarrow() == pyarrow_type # Check a non-lon/lat CRS (e.g., UTM Zone 20N) utm_20n = pyproj.CRS("EPSG:32620") - ibis_type = PyArrowType.to_ibis(gat.wkb(crs=utm_20n).to_pyarrow()) + pyarrow_type = gat.wkb(crs=utm_20n).to_pyarrow() + ibis_type = PyArrowType.to_ibis(pyarrow_type) assert ibis_type.srid == 32620 + assert ibis_type.to_pyarrow() == pyarrow_type From 697b42a6714f038393e328ab49cc2425e741baed Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Sun, 14 Jul 2024 12:27:16 +0000 Subject: [PATCH 15/17] spelling --- ibis/formats/pyarrow.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ibis/formats/pyarrow.py b/ibis/formats/pyarrow.py index c0fb4acf2c30..ac7f9ab8b376 100644 --- a/ibis/formats/pyarrow.py +++ b/ibis/formats/pyarrow.py @@ -151,7 +151,7 @@ def to_ibis(cls, typ: pa.DataType, nullable=True) -> dt.DataType: srid = 4326 else: # This works because the two most common srid authorities are EPSG and ESRI - # and the "codes" are all integers and don't intersect with eachother on + # and the "codes" are all integers and don't intersect with each other on # purpose. This won't scale to something like OGC:CRS27 (not common). srid = int(auth_code[1]) From 7d366f89fc5b31e29777edfffdce5f94f97a4f5b Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Sun, 14 Jul 2024 06:03:01 -0700 Subject: [PATCH 16/17] chore(deps): add explicit pyproj dependency --- pyproject.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 3971126edb1c..505b8bca7cf3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -65,6 +65,7 @@ deltalake = { version = ">=0.9.0,<1", optional = true } duckdb = { version = ">=0.8.1,<2", optional = true } geopandas = { version = ">=0.6,<2", optional = true } geoarrow-types = { version = ">=0.2,<1", optional = true } +pyproj = { version = ">=3.3.0,<4", optional = true } google-cloud-bigquery = { version = ">=3,<4", optional = true } google-cloud-bigquery-storage = { version = ">=2,<3", optional = true } graphviz = { version = ">=0.16,<1", optional = true } @@ -174,7 +175,7 @@ visualization = ["graphviz"] decompiler = ["black"] deltalake = ["deltalake"] examples = ["pins", "fsspec"] -geospatial = ["geoarrow-types", "geopandas", "shapely"] +geospatial = ["geoarrow-types", "geopandas", "pyproj", "shapely"] [tool.poetry.plugins."ibis.backends"] bigquery = "ibis.backends.bigquery" From 3749b22ff47d7172a870bb9b5243876049488e9a Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Sun, 14 Jul 2024 06:07:32 -0700 Subject: [PATCH 17/17] chore: relock --- poetry.lock | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/poetry.lock b/poetry.lock index 12592b2bd259..9ae5afbf73d9 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1887,6 +1887,20 @@ requests = "*" crc = ["crcmod"] gcsfuse = ["fusepy"] +[[package]] +name = "geoarrow-types" +version = "0.2.0" +description = "" +optional = true +python-versions = ">=3.8" +files = [ + {file = "geoarrow_types-0.2.0-py3-none-any.whl", hash = "sha256:b83bd7e4cee92356df1904bc681cd86938ae808778aef8f836d2dce8f234cb7e"}, + {file = "geoarrow_types-0.2.0.tar.gz", hash = "sha256:2dcb3db9c80b2079a7a61c3e74aa46904f1c899136735f1cacc015757707e924"}, +] + +[package.extras] +test = ["pyarrow", "pytest"] + [[package]] name = "geopandas" version = "1.0.1" @@ -7676,7 +7690,7 @@ duckdb = ["duckdb", "pyarrow", "pyarrow-hotfix"] examples = ["fsspec", "pins"] exasol = ["pyarrow", "pyarrow-hotfix", "pyexasol"] flink = ["pyarrow", "pyarrow-hotfix"] -geospatial = ["geopandas", "shapely"] +geospatial = ["geoarrow-types", "geopandas", "pyproj", "shapely"] impala = ["impyla", "pyarrow", "pyarrow-hotfix"] mssql = ["pyarrow", "pyarrow-hotfix", "pyodbc"] mysql = ["pyarrow", "pyarrow-hotfix", "pymysql"] @@ -7694,4 +7708,4 @@ visualization = ["graphviz"] [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "78c776cedc53934095a7266faddc2c32aa5b76a2b6285719df3055e58d9a67a3" +content-hash = "dfb7c483def7e2d0fded85c445aba1a47a725df201fd5429971124b986ff99c9"