Skip to content

Commit

Permalink
refactor: add polars format
Browse files Browse the repository at this point in the history
  • Loading branch information
jcrist committed Feb 27, 2024
1 parent a18ab29 commit 40ada17
Show file tree
Hide file tree
Showing 5 changed files with 173 additions and 153 deletions.
11 changes: 4 additions & 7 deletions ibis/backends/polars/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@
rewrite_join,
)
from ibis.backends.polars.compiler import translate
from ibis.backends.polars.datatypes import dtype_to_polars, schema_from_polars
from ibis.backends.sql.dialects import Polars
from ibis.formats.polars import PolarsSchema
from ibis.util import gen_name, normalize_filename

if TYPE_CHECKING:
Expand Down Expand Up @@ -70,7 +70,7 @@ def list_tables(self, like=None, database=None):
return self._filter_with_like(list(self._tables.keys()), like)

def table(self, name: str, _schema: sch.Schema | None = None) -> ir.Table:
schema = schema_from_polars(self._tables[name].schema)
schema = PolarsSchema.to_ibis(self._tables[name].schema)
return ops.DatabaseTable(name, schema, self).to_expr()

def register(
Expand Down Expand Up @@ -342,10 +342,7 @@ def create_table(
overwrite: bool = False,
) -> ir.Table:
if schema is not None and obj is None:
obj = pl.LazyFrame(
[],
schema={name: dtype_to_polars(dtype) for name, dtype in schema.items()},
)
obj = pl.LazyFrame([], schema=PolarsSchema.from_ibis(schema))

if database is not None:
raise com.IbisError(
Expand Down Expand Up @@ -413,7 +410,7 @@ def _get_sql_string_view_schema(self, name, table, query) -> sch.Schema:
raise NotImplementedError("table.sql() not yet supported in polars")

def _get_schema_using_query(self, query: str) -> sch.Schema:
return schema_from_polars(self._context.execute(query).schema)
return PolarsSchema.to_ibis(self._context.execute(query).schema)

def execute(
self,
Expand Down
40 changes: 21 additions & 19 deletions ibis/backends/polars/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@
import ibis.expr.datatypes as dt
import ibis.expr.operations as ops
from ibis.backends.pandas.rewrites import PandasAsofJoin, PandasJoin, PandasRename
from ibis.backends.polars.datatypes import dtype_to_polars, schema_from_polars
from ibis.expr.operations.udf import InputType
from ibis.formats.polars import PolarsSchema, PolarsType
from ibis.util import gen_name


Expand Down Expand Up @@ -68,13 +68,13 @@ def dummy_table(op, **kw):
@translate.register(ops.InMemoryTable)
def pandas_in_memory_table(op, **_):
lf = pl.from_pandas(op.data.to_frame()).lazy()
schema = schema_from_polars(lf.schema)
schema = PolarsSchema.to_ibis(lf.schema)

columns = []
for name, current_dtype in schema.items():
desired_dtype = op.schema[name]
if current_dtype != desired_dtype:
typ = dtype_to_polars(desired_dtype)
typ = PolarsType.from_ibis(desired_dtype)
columns.append(pl.col(name).cast(typ))

if columns:
Expand All @@ -101,12 +101,12 @@ def literal(op, **_):

if dtype.is_array():
value = pl.Series("", value)
typ = dtype_to_polars(dtype)
typ = PolarsType.from_ibis(dtype)
val = pl.lit(value, dtype=typ)
return val.implode()
elif dtype.is_struct():
values = [
pl.lit(v, dtype=dtype_to_polars(dtype[k])).alias(k)
pl.lit(v, dtype=PolarsType.from_ibis(dtype[k])).alias(k)
for k, v in value.items()
]
return pl.struct(values)
Expand All @@ -117,7 +117,7 @@ def literal(op, **_):
elif dtype.is_binary():
return pl.lit(value)
else:
typ = dtype_to_polars(dtype)
typ = PolarsType.from_ibis(dtype)
return pl.lit(op.value, dtype=typ)


Expand Down Expand Up @@ -179,7 +179,7 @@ def _cast(op, strict=True, **kw):
return arg.dt.truncate("1s")
return arg

typ = dtype_to_polars(to)
typ = PolarsType.from_ibis(to)
return arg.cast(typ, strict=strict)


Expand Down Expand Up @@ -509,14 +509,14 @@ def in_values(op, **kw):
@translate.register(ops.StringLength)
def string_length(op, **kw):
arg = translate(op.arg, **kw)
typ = dtype_to_polars(op.dtype)
typ = PolarsType.from_ibis(op.dtype)
return arg.str.len_bytes().cast(typ)


@translate.register(ops.Capitalize)
def capitalize(op, **kw):
arg = translate(op.arg, **kw)
typ = dtype_to_polars(op.dtype)
typ = PolarsType.from_ibis(op.dtype)
first = arg.str.slice(0, 1).str.to_uppercase()
rest = arg.str.slice(1, None).str.to_lowercase()
return (first + rest).cast(typ)
Expand Down Expand Up @@ -652,7 +652,7 @@ def str_right(op, **kw):
@translate.register(ops.Round)
def round(op, **kw):
arg = translate(op.arg, **kw)
typ = dtype_to_polars(op.dtype)
typ = PolarsType.from_ibis(op.dtype)
digits = _literal_value(op.digits)
return arg.round(digits or 0).cast(typ)

Expand Down Expand Up @@ -705,7 +705,7 @@ def repeat(op, **kw):
@translate.register(ops.Sign)
def sign(op, **kw):
arg = translate(op.arg, **kw)
typ = dtype_to_polars(op.dtype)
typ = PolarsType.from_ibis(op.dtype)
return arg.sign().cast(typ)


Expand Down Expand Up @@ -765,7 +765,7 @@ def reduction(op, **kw):
first, *rest = args
method = operator.methodcaller(agg, *rest)
return method(first.filter(reduce(operator.and_, predicates))).cast(
dtype_to_polars(op.dtype)
PolarsType.from_ibis(op.dtype)
)


Expand Down Expand Up @@ -815,7 +815,7 @@ def count_star(op, **kw):
result = pl.len()
except AttributeError:
result = pl.count()
return result.cast(dtype_to_polars(op.dtype))
return result.cast(PolarsType.from_ibis(op.dtype))


@translate.register(ops.TimestampNow)
Expand Down Expand Up @@ -1109,7 +1109,7 @@ def bitwise_binops(op, **kw):
else:
result = pl.map_batches([left, right], lambda cols: ufunc(cols[0], cols[1]))

return result.cast(dtype_to_polars(op.dtype))
return result.cast(PolarsType.from_ibis(op.dtype))


@translate.register(ops.BitwiseNot)
Expand Down Expand Up @@ -1149,7 +1149,7 @@ def binop(op, **kw):
@translate.register(ops.ElementWiseVectorizedUDF)
def elementwise_udf(op, **kw):
func_args = [translate(arg, **kw) for arg in op.func_args]
return_type = dtype_to_polars(op.return_type)
return_type = PolarsType.from_ibis(op.return_type)

return pl.map_batches(
func_args, lambda args: op.func(*args), return_dtype=return_type
Expand Down Expand Up @@ -1252,7 +1252,7 @@ def execute_count_distinct_star(op, **kw):
# -> convert back to a polars series
InputType.PYTHON: lambda func, dtype, args: pl.Series(
map(func, *(arg.to_list() for arg in args)),
dtype=dtype_to_polars(dtype),
dtype=PolarsType.from_ibis(dtype),
),
# Convert polars series into a pyarrow array
# -> invoke the function on the pyarrow array
Expand All @@ -1272,7 +1272,7 @@ def execute_scalar_udf(op, **kw):
return pl.map_batches(
exprs=[translate(arg, **kw) for arg in op.args],
function=partial(_UDF_INVOKERS[input_type], op.__func__, dtype),
return_dtype=dtype_to_polars(dtype),
return_dtype=PolarsType.from_ibis(dtype),
)
elif input_type == InputType.BUILTIN:
first, *rest = map(translate, op.args)
Expand Down Expand Up @@ -1307,7 +1307,9 @@ def split(args):
arg = translate(op.arg, **kw)
pattern = translate(op.pattern, **kw)
return pl.map_batches(
exprs=(arg, pattern), function=split, return_dtype=dtype_to_polars(op.dtype)
exprs=(arg, pattern),
function=split,
return_dtype=PolarsType.from_ibis(op.dtype),
)


Expand All @@ -1319,7 +1321,7 @@ def execute_integer_range(op, **kw):
)
step = op.step.value

dtype = dtype_to_polars(op.dtype)
dtype = PolarsType.from_ibis(op.dtype)
empty = pl.int_ranges(0, 0, dtype=dtype)

if step == 0:
Expand Down
123 changes: 0 additions & 123 deletions ibis/backends/polars/datatypes.py

This file was deleted.

Loading

0 comments on commit 40ada17

Please sign in to comment.