From 2d562b7349add2e50c618262e62d8112832e8769 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Mon, 22 Jan 2024 09:37:42 -0500 Subject: [PATCH] feat(examples): add `zones` geojson example (#8040) Adds a `zones` geospatial data example Closes #7958. --- ibis/backends/tests/test_examples.py | 36 ++++++++++++++++++++++++++++ ibis/examples/__init__.py | 27 +++++++++++++++++++++ ibis/examples/gen_registry.py | 13 ++++++++++ 3 files changed, 76 insertions(+) diff --git a/ibis/backends/tests/test_examples.py b/ibis/backends/tests/test_examples.py index 235ea4804703..f46d7ed6df19 100644 --- a/ibis/backends/tests/test_examples.py +++ b/ibis/backends/tests/test_examples.py @@ -49,3 +49,39 @@ def test_load_examples(con, example, columns): t = getattr(ibis.examples, example).fetch(backend=con) assert t.columns == columns assert t.count().execute() > 0 + + +@pytest.mark.skipif( + (LINUX or MACOS) and SANDBOXED, + reason="nix on linux cannot download duckdb extensions or data due to sandboxing", +) +@pytest.mark.notimpl( + [ + # everything except duckdb + "bigquery", + "clickhouse", + "dask", + "datafusion", + "druid", + "exasol", + "flink", + "impala", + "mssql", + "mysql", + "oracle", + "pandas", + "polars", + "postgres", + "pyspark", + "snowflake", + "sqlite", + "trino", + ] +) +def test_load_geo_example(con): + pytest.importorskip("geopandas") + pytest.importorskip("shapely") + pytest.importorskip("geoalchemy2") + + t = ibis.examples.zones.fetch(backend=con) + assert t.geom.type().is_geospatial() diff --git a/ibis/examples/__init__.py b/ibis/examples/__init__.py index eb23d7d73580..fd7d8257eb1c 100644 --- a/ibis/examples/__init__.py +++ b/ibis/examples/__init__.py @@ -126,6 +126,33 @@ def __dir__() -> list[str]: return sorted(_get_metadata().keys()) +class Zones(Concrete): + name: str + help: Optional[str] + + def fetch( + self, + *, + table_name: str | None = None, + backend: BaseBackend | None = None, + ) -> ir.Table: + if backend is None: + backend = ibis.get_backend() + + name = self.name + + if table_name is None: + table_name = name + + board = _get_board() + + (path,) = board.pin_download(name) + return backend.read_geo(path) + + +zones = Zones("zones", help="Taxi zones in New York City (EPSG:2263)") + + def __getattr__(name: str) -> Example: try: meta = _get_metadata() diff --git a/ibis/examples/gen_registry.py b/ibis/examples/gen_registry.py index eccf7aab856b..00be9fa8d81d 100755 --- a/ibis/examples/gen_registry.py +++ b/ibis/examples/gen_registry.py @@ -11,6 +11,7 @@ from collections import Counter from pathlib import Path from typing import TYPE_CHECKING, Any +from urllib.request import urlretrieve import pins import requests @@ -107,6 +108,16 @@ def add_movielens_example( con.read_csv(csv_path).to_parquet(parquet_path, codec="zstd") +def add_zones_geojson(data_path: Path) -> None: + file_name = "zones.geojson" + url = "https://raw.githubusercontent.com/ibis-project/testing-data/master/geojson/zones.geojson" + + file_path = Path(file_name) + + if not file_path.exists(): + urlretrieve(url, data_path / file_path) + + def add_imdb_example(data_path: Path) -> None: def convert_to_parquet( base: Path, @@ -229,6 +240,8 @@ def main(parser): add_wowah_example(data_path, client=storage.Client(), metadata=metadata) + add_zones_geojson(data_path) + # generate data from R subprocess.check_call(["Rscript", str(EXAMPLES_DIRECTORY / "gen_examples.R")])