Skip to content

Commit

Permalink
feat(api): support converting ibis types & schemas to/from polars typ…
Browse files Browse the repository at this point in the history
…es & schemas
  • Loading branch information
jcrist committed Feb 27, 2024
1 parent 40ada17 commit 73add93
Show file tree
Hide file tree
Showing 4 changed files with 77 additions and 1 deletion.
18 changes: 18 additions & 0 deletions ibis/expr/datatypes/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,11 @@ def from_pyarrow(value, nullable=True):
return DataType.from_pyarrow(value, nullable)


@dtype.register("polars.datatypes.classes.DataTypeClass")
def from_polars(value, nullable=True):
return DataType.from_polars(value, nullable)


# lock the dispatcher to prevent new types from being registered
del dtype.register

Expand Down Expand Up @@ -237,6 +242,13 @@ def from_pyarrow(cls, arrow_type, nullable=True) -> Self:

return PyArrowType.to_ibis(arrow_type, nullable=nullable)

@classmethod
def from_polars(cls, polars_type, nullable=True) -> Self:
"""Return the equivalent ibis datatype."""
from ibis.formats.polars import PolarsType

return PolarsType.to_ibis(polars_type, nullable=nullable)

@classmethod
def from_dask(cls, dask_type, nullable=True) -> Self:
"""Return the equivalent ibis datatype."""
Expand All @@ -260,6 +272,12 @@ def to_pyarrow(self):

return PyArrowType.from_ibis(self)

def to_polars(self):
"""Return the equivalent polars datatype."""
from ibis.formats.polars import PolarsType

return PolarsType.from_ibis(self)

def to_dask(self):
"""Return the equivalent dask datatype."""
return self.to_pandas()
Expand Down
8 changes: 7 additions & 1 deletion ibis/expr/datatypes/tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -690,6 +690,12 @@ def test_type_coercion():
param(dt.boolean, id="boolean"),
],
)
@pytest.mark.parametrize("fmt", ["numpy", "pandas", "pyarrow"])
@pytest.mark.parametrize("fmt", ["numpy", "pandas", "pyarrow", "polars"])
def test_type_roundtrip(dtype, fmt):
pytest.importorskip(fmt)
assert getattr(dt.DataType, f"from_{fmt}")(getattr(dtype, f"to_{fmt}")()) == dtype


def test_dtype_from_polars():
pl = pytest.importorskip("polars")
assert dt.dtype(pl.Int64) == dt.int64
22 changes: 22 additions & 0 deletions ibis/expr/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,13 @@ def from_pyarrow(cls, pyarrow_schema):

return PyArrowSchema.to_ibis(pyarrow_schema)

@classmethod
def from_polars(cls, polars_schema):
"""Return the equivalent ibis schema."""
from ibis.formats.polars import PolarsSchema

return PolarsSchema.to_ibis(polars_schema)

@classmethod
def from_dask(cls, dask_schema):
"""Return the equivalent ibis schema."""
Expand All @@ -179,6 +186,12 @@ def to_pyarrow(self):

return PyArrowSchema.from_ibis(self)

def to_polars(self):
"""Return the equivalent polars schema."""
from ibis.formats.polars import PolarsSchema

return PolarsSchema.from_ibis(self)

def to_dask(self):
"""Return the equivalent dask dtypes."""
return self.to_pandas()
Expand Down Expand Up @@ -290,6 +303,15 @@ def infer_pyarrow_table(table, schema=None):
return PyArrowSchema.to_ibis(schema)


@infer.register("polars.DataFrame")
@infer.register("polars.LazyFrame")
def infer_polars_dataframe(df, schema=None):
from ibis.formats.polars import PolarsSchema

schema = schema if schema is not None else df.schema
return PolarsSchema.to_ibis(schema)


# lock the dispatchers to avoid adding new implementations
del infer.register
del schema.register
30 changes: 30 additions & 0 deletions ibis/expr/tests/test_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -403,6 +403,36 @@ def test_schema_from_to_pyarrow_schema():
assert restored_schema == pyarrow_schema


@pytest.mark.parametrize("lazy", [False, True])
def test_schema_infer_polars_dataframe(lazy):
pl = pytest.importorskip("polars")
df = pl.DataFrame(
{"a": [1, 2, 3], "b": ["a", "b", "c"], "c": [True, False, True]},
schema={"a": pl.Int64, "b": pl.Utf8, "c": pl.Boolean},
)
if lazy:
df = df.lazy()
s = sch.infer(df)
assert s == sch.Schema({"a": dt.int64, "b": dt.string, "c": dt.boolean})


def test_schema_from_to_polars_schema():
pl = pytest.importorskip("polars")

polars_schema = {
"a": pl.Int64,
"b": pl.Utf8,
"c": pl.Boolean,
}
ibis_schema = sch.Schema({"a": dt.int64, "b": dt.string, "c": dt.boolean})

res = sch.Schema.from_polars(polars_schema)
assert res == ibis_schema

res = ibis_schema.to_polars()
assert res == polars_schema


def test_schema_from_to_numpy_dtypes():
numpy_dtypes = [
("a", np.dtype("int64")),
Expand Down

0 comments on commit 73add93

Please sign in to comment.