From 73add932e60a293f104a1f4835b8491397d62a06 Mon Sep 17 00:00:00 2001 From: Jim Crist-Harif Date: Mon, 26 Feb 2024 16:28:28 -0600 Subject: [PATCH] feat(api): support converting ibis types & schemas to/from polars types & schemas --- ibis/expr/datatypes/core.py | 18 ++++++++++++++++ ibis/expr/datatypes/tests/test_core.py | 8 ++++++- ibis/expr/schema.py | 22 +++++++++++++++++++ ibis/expr/tests/test_schema.py | 30 ++++++++++++++++++++++++++ 4 files changed, 77 insertions(+), 1 deletion(-) diff --git a/ibis/expr/datatypes/core.py b/ibis/expr/datatypes/core.py index 8605d5aba7a1..798647edad6b 100644 --- a/ibis/expr/datatypes/core.py +++ b/ibis/expr/datatypes/core.py @@ -82,6 +82,11 @@ def from_pyarrow(value, nullable=True): return DataType.from_pyarrow(value, nullable) +@dtype.register("polars.datatypes.classes.DataTypeClass") +def from_polars(value, nullable=True): + return DataType.from_polars(value, nullable) + + # lock the dispatcher to prevent new types from being registered del dtype.register @@ -237,6 +242,13 @@ def from_pyarrow(cls, arrow_type, nullable=True) -> Self: return PyArrowType.to_ibis(arrow_type, nullable=nullable) + @classmethod + def from_polars(cls, polars_type, nullable=True) -> Self: + """Return the equivalent ibis datatype.""" + from ibis.formats.polars import PolarsType + + return PolarsType.to_ibis(polars_type, nullable=nullable) + @classmethod def from_dask(cls, dask_type, nullable=True) -> Self: """Return the equivalent ibis datatype.""" @@ -260,6 +272,12 @@ def to_pyarrow(self): return PyArrowType.from_ibis(self) + def to_polars(self): + """Return the equivalent polars datatype.""" + from ibis.formats.polars import PolarsType + + return PolarsType.from_ibis(self) + def to_dask(self): """Return the equivalent dask datatype.""" return self.to_pandas() diff --git a/ibis/expr/datatypes/tests/test_core.py b/ibis/expr/datatypes/tests/test_core.py index 10ac23737490..ba6ad18d35c7 100644 --- a/ibis/expr/datatypes/tests/test_core.py +++ b/ibis/expr/datatypes/tests/test_core.py @@ -690,6 +690,12 @@ def test_type_coercion(): param(dt.boolean, id="boolean"), ], ) -@pytest.mark.parametrize("fmt", ["numpy", "pandas", "pyarrow"]) +@pytest.mark.parametrize("fmt", ["numpy", "pandas", "pyarrow", "polars"]) def test_type_roundtrip(dtype, fmt): + pytest.importorskip(fmt) assert getattr(dt.DataType, f"from_{fmt}")(getattr(dtype, f"to_{fmt}")()) == dtype + + +def test_dtype_from_polars(): + pl = pytest.importorskip("polars") + assert dt.dtype(pl.Int64) == dt.int64 diff --git a/ibis/expr/schema.py b/ibis/expr/schema.py index d7a999507ae1..2be484b0efc1 100644 --- a/ibis/expr/schema.py +++ b/ibis/expr/schema.py @@ -156,6 +156,13 @@ def from_pyarrow(cls, pyarrow_schema): return PyArrowSchema.to_ibis(pyarrow_schema) + @classmethod + def from_polars(cls, polars_schema): + """Return the equivalent ibis schema.""" + from ibis.formats.polars import PolarsSchema + + return PolarsSchema.to_ibis(polars_schema) + @classmethod def from_dask(cls, dask_schema): """Return the equivalent ibis schema.""" @@ -179,6 +186,12 @@ def to_pyarrow(self): return PyArrowSchema.from_ibis(self) + def to_polars(self): + """Return the equivalent polars schema.""" + from ibis.formats.polars import PolarsSchema + + return PolarsSchema.from_ibis(self) + def to_dask(self): """Return the equivalent dask dtypes.""" return self.to_pandas() @@ -290,6 +303,15 @@ def infer_pyarrow_table(table, schema=None): return PyArrowSchema.to_ibis(schema) +@infer.register("polars.DataFrame") +@infer.register("polars.LazyFrame") +def infer_polars_dataframe(df, schema=None): + from ibis.formats.polars import PolarsSchema + + schema = schema if schema is not None else df.schema + return PolarsSchema.to_ibis(schema) + + # lock the dispatchers to avoid adding new implementations del infer.register del schema.register diff --git a/ibis/expr/tests/test_schema.py b/ibis/expr/tests/test_schema.py index 7d926a1ee198..5d56462903c1 100644 --- a/ibis/expr/tests/test_schema.py +++ b/ibis/expr/tests/test_schema.py @@ -403,6 +403,36 @@ def test_schema_from_to_pyarrow_schema(): assert restored_schema == pyarrow_schema +@pytest.mark.parametrize("lazy", [False, True]) +def test_schema_infer_polars_dataframe(lazy): + pl = pytest.importorskip("polars") + df = pl.DataFrame( + {"a": [1, 2, 3], "b": ["a", "b", "c"], "c": [True, False, True]}, + schema={"a": pl.Int64, "b": pl.Utf8, "c": pl.Boolean}, + ) + if lazy: + df = df.lazy() + s = sch.infer(df) + assert s == sch.Schema({"a": dt.int64, "b": dt.string, "c": dt.boolean}) + + +def test_schema_from_to_polars_schema(): + pl = pytest.importorskip("polars") + + polars_schema = { + "a": pl.Int64, + "b": pl.Utf8, + "c": pl.Boolean, + } + ibis_schema = sch.Schema({"a": dt.int64, "b": dt.string, "c": dt.boolean}) + + res = sch.Schema.from_polars(polars_schema) + assert res == ibis_schema + + res = ibis_schema.to_polars() + assert res == polars_schema + + def test_schema_from_to_numpy_dtypes(): numpy_dtypes = [ ("a", np.dtype("int64")),