diff --git a/ibis/backends/duckdb/__init__.py b/ibis/backends/duckdb/__init__.py index 5219d249e305..b7409a20a285 100644 --- a/ibis/backends/duckdb/__init__.py +++ b/ibis/backends/duckdb/__init__.py @@ -24,6 +24,7 @@ from ibis import util from ibis.backends.base import CanCreateSchema from ibis.backends.base.sql.alchemy import AlchemyCrossSchemaBackend +from ibis.backends.base.sql.alchemy.geospatial import geospatial_supported from ibis.backends.base.sqlglot import C, F from ibis.backends.duckdb.compiler import DuckDBSQLCompiler from ibis.backends.duckdb.datatypes import DuckDBType @@ -1199,7 +1200,30 @@ def fetch_from_cursor( for name, col in zip(table.column_names, table.columns) } ) - return PandasData.convert_table(df, schema) + df = PandasData.convert_table(df, schema) + if not df.empty and geospatial_supported: + return self._to_geodataframe(df, schema) + return df + + # TODO(gforsyth): this may not need to be specialized in the future + @staticmethod + def _to_geodataframe(df, schema): + """Convert `df` to a `GeoDataFrame`. + + Required libraries for geospatial support must be installed and + a geospatial column is present in the dataframe. + """ + import geopandas as gpd + + geom_col = None + for name, dtype in schema.items(): + if dtype.is_geospatial(): + if not geom_col: + geom_col = name + df[name] = gpd.GeoSeries.from_wkb(df[name]) + if geom_col: + df = gpd.GeoDataFrame(df, geometry=geom_col) + return df def _metadata(self, query: str) -> Iterator[tuple[str, dt.DataType]]: with self.begin() as con: diff --git a/ibis/backends/duckdb/datatypes.py b/ibis/backends/duckdb/datatypes.py index e931867f4b14..68b4e0ec51cf 100644 --- a/ibis/backends/duckdb/datatypes.py +++ b/ibis/backends/duckdb/datatypes.py @@ -7,6 +7,19 @@ from ibis.backends.base.sql.alchemy.datatypes import AlchemyType from ibis.backends.base.sqlglot.datatypes import DuckDBType as SqlglotDuckdbType + +try: + from geoalchemy2 import Geometry + + class Geometry_WKB(Geometry): + as_binary = "ST_AsWKB" + +except ImportError: + + class Geometry_WKB: + ... + + _from_duckdb_types = { psql.BYTEA: dt.Binary, psql.UUID: dt.UUID, @@ -35,6 +48,8 @@ dt.UInt16: ducktypes.USmallInteger, dt.UInt32: ducktypes.UInteger, dt.UInt64: ducktypes.UBigInteger, + # Handle projections with geometry columns + dt.Geometry: Geometry_WKB, } diff --git a/ibis/backends/duckdb/tests/test_register.py b/ibis/backends/duckdb/tests/test_register.py index 67689b23aa6a..0f5bb1cdc17c 100644 --- a/ibis/backends/duckdb/tests/test_register.py +++ b/ibis/backends/duckdb/tests/test_register.py @@ -46,14 +46,22 @@ def test_read_parquet(data_dir): assert t.count().execute() -@pytest.mark.xfail(raises=duckdb.duckdb.CatalogException, reason="ST_AsEWKB") -def test_read_geo_fail(con, data_dir): +@pytest.mark.xfail(raises=NotImplementedError) +def test_read_geo_to_pyarrow(con, data_dir): pytest.importorskip("geopandas") t = con.read_geo(data_dir / "geojson" / "zones.geojson") # can't convert geometry to arrow type yet assert t.head().to_pyarrow() +def test_read_geo_to_geopandas(con, data_dir): + gpd = pytest.importorskip("geopandas") + t = con.read_geo(data_dir / "geojson" / "zones.geojson") + # can't convert geometry to arrow type yet + gdf = t.head().to_pandas() + assert isinstance(gdf, gpd.GeoDataFrame) + + def test_read_geo(con, data_dir): pytest.importorskip("geopandas") t = con.read_geo(data_dir / "geojson" / "zones.geojson")