Skip to content

Commit

Permalink
feat(duckdb): enforce aswkb for projections, coerce to geopandas
Browse files Browse the repository at this point in the history
Two things happening in here:
1. We intercept the call to `get_sqla_type` to enforce the use of
`ST_AsWKB`, overriding the behavior of `geoalchemy` which is very
PostGIS-focused.
2. We add a DuckDB-specific `_to_geodataframe` since we're getting a WKB
out of DuckDB and can pass this directly to `geopandas` without first
going through Shapely (as we do in the base alchemy method)
  • Loading branch information
gforsyth committed Dec 4, 2023
1 parent cc16715 commit 33327dc
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 3 deletions.
26 changes: 25 additions & 1 deletion ibis/backends/duckdb/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from ibis import util
from ibis.backends.base import CanCreateSchema
from ibis.backends.base.sql.alchemy import AlchemyCrossSchemaBackend
from ibis.backends.base.sql.alchemy.geospatial import geospatial_supported
from ibis.backends.base.sqlglot import C, F
from ibis.backends.duckdb.compiler import DuckDBSQLCompiler
from ibis.backends.duckdb.datatypes import DuckDBType
Expand Down Expand Up @@ -1199,7 +1200,30 @@ def fetch_from_cursor(
for name, col in zip(table.column_names, table.columns)
}
)
return PandasData.convert_table(df, schema)
df = PandasData.convert_table(df, schema)
if not df.empty and geospatial_supported:
return self._to_geodataframe(df, schema)
return df

# TODO(gforsyth): this may not need to be specialized in the future
@staticmethod
def _to_geodataframe(df, schema):
"""Convert `df` to a `GeoDataFrame`.
Required libraries for geospatial support must be installed and
a geospatial column is present in the dataframe.
"""
import geopandas as gpd

geom_col = None
for name, dtype in schema.items():
if dtype.is_geospatial():
if not geom_col:
geom_col = name
df[name] = gpd.GeoSeries.from_wkb(df[name])
if geom_col:
df = gpd.GeoDataFrame(df, geometry=geom_col)
return df

def _metadata(self, query: str) -> Iterator[tuple[str, dt.DataType]]:
with self.begin() as con:
Expand Down
15 changes: 15 additions & 0 deletions ibis/backends/duckdb/datatypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,19 @@
from ibis.backends.base.sql.alchemy.datatypes import AlchemyType
from ibis.backends.base.sqlglot.datatypes import DuckDBType as SqlglotDuckdbType


try:
from geoalchemy2 import Geometry

class Geometry_WKB(Geometry):
as_binary = "ST_AsWKB"

except ImportError:

class Geometry_WKB:
...


_from_duckdb_types = {
psql.BYTEA: dt.Binary,
psql.UUID: dt.UUID,
Expand Down Expand Up @@ -35,6 +48,8 @@
dt.UInt16: ducktypes.USmallInteger,
dt.UInt32: ducktypes.UInteger,
dt.UInt64: ducktypes.UBigInteger,
# Handle projections with geometry columns
dt.Geometry: Geometry_WKB,
}


Expand Down
12 changes: 10 additions & 2 deletions ibis/backends/duckdb/tests/test_register.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,14 +46,22 @@ def test_read_parquet(data_dir):
assert t.count().execute()


@pytest.mark.xfail(raises=duckdb.duckdb.CatalogException, reason="ST_AsEWKB")
def test_read_geo_fail(con, data_dir):
@pytest.mark.xfail(raises=NotImplementedError)
def test_read_geo_to_pyarrow(con, data_dir):
pytest.importorskip("geopandas")
t = con.read_geo(data_dir / "geojson" / "zones.geojson")
# can't convert geometry to arrow type yet
assert t.head().to_pyarrow()


def test_read_geo_to_geopandas(con, data_dir):
gpd = pytest.importorskip("geopandas")
t = con.read_geo(data_dir / "geojson" / "zones.geojson")
# can't convert geometry to arrow type yet
gdf = t.head().to_pandas()
assert isinstance(gdf, gpd.GeoDataFrame)


def test_read_geo(con, data_dir):
pytest.importorskip("geopandas")
t = con.read_geo(data_dir / "geojson" / "zones.geojson")
Expand Down

0 comments on commit 33327dc

Please sign in to comment.