Skip to content

Commit

Permalink
feat(tables): cast table using schema
Browse files Browse the repository at this point in the history
  • Loading branch information
cpcloud committed Mar 8, 2023
1 parent 3e2c364 commit 96ce109
Show file tree
Hide file tree
Showing 4 changed files with 134 additions and 12 deletions.
23 changes: 22 additions & 1 deletion ibis/common/typing.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,16 @@
from __future__ import annotations

import sys
from typing import Any, ForwardRef
from typing import (
TYPE_CHECKING,
Any,
ForwardRef,
Iterable,
Mapping,
Tuple,
TypeVar,
Union,
)

import toolz

Expand Down Expand Up @@ -36,3 +45,15 @@ def evaluate_typehint(hint, module_name) -> Any:
return hint._evaluate(globalns, locals())
else:
return hint


if TYPE_CHECKING:
import ibis.expr.datatypes as dt
import ibis.expr.schema as sch

SupportsSchema = TypeVar(
"SupportsSchema",
Iterable[Tuple[str, Union[str, dt.DataType]]],
Mapping[str, Union[str, dt.DataType]],
sch.Schema,
)
13 changes: 3 additions & 10 deletions ibis/expr/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,7 @@
import functools
import operator
from pathlib import Path
from typing import TYPE_CHECKING, Any, Iterable, Mapping, NamedTuple, Sequence, TypeVar
from typing import Tuple as _Tuple
from typing import Union as _Union
from typing import TYPE_CHECKING, Any, Iterable, NamedTuple, Sequence, TypeVar

import dateutil.parser
import numpy as np
Expand Down Expand Up @@ -45,6 +43,8 @@
if TYPE_CHECKING:
import pandas as pd

from ibis.common.typing import SupportsSchema

__all__ = (
'aggregate',
'and_',
Expand Down Expand Up @@ -174,13 +174,6 @@

negate = ir.NumericValue.negate

SupportsSchema = TypeVar(
"SupportsSchema",
Iterable[_Tuple[str, _Union[str, dt.DataType]]],
Mapping[str, dt.DataType],
sch.Schema,
)


def _deferred(fn):
@functools.wraps(fn)
Expand Down
92 changes: 91 additions & 1 deletion ibis/expr/types/relations.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,16 @@
import ibis.common.exceptions as com
import ibis.expr.datatypes as dt
import ibis.expr.operations as ops
import ibis.expr.schema as sch
from ibis import util
from ibis.expr.deferred import Deferred
from ibis.expr.types.core import Expr, _FixedTextJupyterMixin

if TYPE_CHECKING:
import pandas as pd

import ibis.expr.schema as sch
import ibis.expr.types as ir
from ibis.common.typing import SupportsSchema
from ibis.expr.selectors import IfAnyAll, Selector
from ibis.expr.types.groupby import GroupedTable

Expand Down Expand Up @@ -143,6 +144,95 @@ def __contains__(self, name: str) -> bool:
"""
return name in self.schema()

def cast(self, schema: SupportsSchema) -> Table:
"""Cast the columns of a table.
!!! note "If you need to cast columns to a single type, use [selectors](https://ibis-project.org/blog/selectors/)."
Parameters
----------
schema
Mapping, schema or iterable of pairs to use for casting
Returns
-------
Table
Casted table
Examples
--------
>>> import ibis
>>> import ibis.expr.selectors as s
>>> ibis.options.interactive = True
>>> t = ibis.examples.penguins.fetch()
>>> t.schema()
ibis.Schema {
species string
island string
bill_length_mm float64
bill_depth_mm float64
flipper_length_mm int64
body_mass_g int64
sex string
year int64
}
>>> cols = ["body_mass_g", "bill_length_mm"]
>>> t[cols].head()
┏━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┓
┃ body_mass_g ┃ bill_length_mm ┃
┡━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━┩
│ int64 │ float64 │
├─────────────┼────────────────┤
│ 3750 │ 39.1 │
│ 3800 │ 39.5 │
│ 3250 │ 40.3 │
│ ∅ │ nan │
│ 3450 │ 36.7 │
└─────────────┴────────────────┘
Columns not present in the input schema will be passed through unchanged
>>> t.columns
['species', 'island', 'bill_length_mm', 'bill_depth_mm', 'flipper_length_mm', 'body_mass_g', 'sex', 'year']
>>> expr = t.cast({"body_mass_g": "float64", "bill_length_mm": "int"})
>>> expr.select(*cols).head()
┏━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┓
┃ body_mass_g ┃ bill_length_mm ┃
┡━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━┩
│ float64 │ int64 │
├─────────────┼────────────────┤
│ 3750.0 │ 39 │
│ 3800.0 │ 40 │
│ 3250.0 │ 40 │
│ nan │ ∅ │
│ 3450.0 │ 37 │
└─────────────┴────────────────┘
Columns that are in the input `schema` but not in the table raise an error
>>> t.cast({"foo": "string"})
Traceback (most recent call last):
...
ibis.common.exceptions.IbisError: Cast schema has fields that are not in the table: ['foo']
"""
schema = sch.schema(schema)

cols = []

columns = self.columns
if missing_fields := frozenset(schema.names).difference(columns):
raise com.IbisError(
f"Cast schema has fields that are not in the table: {sorted(missing_fields)}"
)

for col in columns:
if (new_type := schema.get(col)) is not None:
new_col = self[col].cast(new_type).name(col)
else:
new_col = col
cols.append(new_col)
return self.select(*cols)

def __rich_console__(self, console, options):
from rich.text import Text

Expand Down
18 changes: 18 additions & 0 deletions ibis/tests/expr/test_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -1702,3 +1702,21 @@ def test_filter_with_literal(value, api):
int_val = ibis.literal(int(value))
with pytest.raises((NotImplementedError, com.IbisTypeError)):
api(t, int_val)


def test_cast():
t = ibis.table(dict(a="int", b="string", c="float"), name="t")

assert t.cast({"a": "string"}).equals(t.mutate(a=t.a.cast("string")))

with pytest.raises(
com.IbisError, match="fields that are not in the table: .+'d'.+"
):
t.cast({"d": "array<int>"}).equals(t.select())

assert t.cast(ibis.schema({"a": "string", "b": "int"})).equals(
t.mutate(a=t.a.cast("string"), b=t.b.cast("int"))
)
assert t.cast([("a", "string"), ("b", "float")]).equals(
t.mutate(a=t.a.cast("string"), b=t.b.cast("float"))
)

0 comments on commit 96ce109

Please sign in to comment.