Skip to content

Commit

Permalink
refactor(ux): return expression from Table.info
Browse files Browse the repository at this point in the history
BREAKING CHANGE: `Table.info` now returns an expression
  • Loading branch information
cpcloud authored and kszucs committed Mar 6, 2023
1 parent 2430f52 commit 71cc0e0
Show file tree
Hide file tree
Showing 3 changed files with 65 additions and 113 deletions.
45 changes: 22 additions & 23 deletions ibis/backends/tests/test_generic.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,11 @@
import contextlib
import decimal
import io
from contextlib import redirect_stdout
from operator import invert, methodcaller, neg

import numpy as np
import pandas as pd
import pytest
import sqlalchemy.exc
import sqlalchemy as sa
import toolz
from pytest import param

Expand Down Expand Up @@ -548,25 +546,26 @@ def test_order_by_random(alltypes):
assert not r1.equals(r2)


def check_table_info(buf, schema):
info_str = buf.getvalue()

assert "Null" in info_str
assert all(type.__class__.__name__ in info_str for type in schema.types)
assert all(name in info_str for name in schema.names)


def test_table_info_buf(alltypes):
buf = io.StringIO()
alltypes.info(buf=buf)
check_table_info(buf, alltypes.schema())


def test_table_info_no_buf(alltypes):
buf = io.StringIO()
with redirect_stdout(buf):
alltypes.info()
check_table_info(buf, alltypes.schema())
@pytest.mark.notyet(
["druid"],
raises=sa.exc.ProgrammingError,
reason="Druid only supports trivial unions",
)
@pytest.mark.notimpl(["datafusion"], raises=com.OperationNotDefinedError)
def test_table_info(alltypes):
expr = alltypes.info()
df = expr.execute()
assert alltypes.columns == list(df.name)
assert expr.columns == [
"name",
"type",
"nullable",
"nulls",
"non_nulls",
"null_frac",
"pos",
]
assert expr.columns == list(df.columns)


@pytest.mark.parametrize(
Expand Down Expand Up @@ -750,7 +749,7 @@ def test_select_filter_select(backend, alltypes, df):


@pytest.mark.notimpl(["datafusion"], raises=com.OperationNotDefinedError)
@pytest.mark.broken(["mssql"], raises=sqlalchemy.exc.OperationalError)
@pytest.mark.broken(["mssql"], raises=sa.exc.OperationalError)
def test_between(backend, alltypes, df):
expr = alltypes.double_col.between(5, 10)
result = expr.execute().rename("double_col")
Expand Down
129 changes: 39 additions & 90 deletions ibis/expr/types/relations.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,9 @@
import functools
import itertools
import re
import sys
import warnings
from keyword import iskeyword
from typing import IO, TYPE_CHECKING, Callable, Iterable, Literal, Mapping, Sequence
from typing import TYPE_CHECKING, Callable, Iterable, Literal, Mapping, Sequence

from public import public

Expand Down Expand Up @@ -1936,105 +1935,55 @@ def unpack(self, *columns: str) -> Table:
result_columns.append(column)
return self[result_columns]

def info(self, buf: IO[str] | None = None) -> None:
"""Show summary information about a table.
Parameters
----------
buf
A writable buffer, defaults to stdout
def info(self) -> Table:
"""Return summary information about a table.
Returns
-------
None
This method prints to a buffer (stdout by default) and returns nothing.
Table
Summary of `self`
Examples
--------
>>> import ibis
>>> ibis.options.interactive = True
>>> t = ibis.examples.penguins.fetch(table_name="penguins")
>>> t
┏━━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━┳━━━┓
┃ species ┃ island ┃ bill_length_mm ┃ bill_depth_mm ┃ flipper_length_mm ┃ … ┃
┡━━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━╇━━━┩
│ string │ string │ float64 │ float64 │ int64 │ … │
├─────────┼───────────┼────────────────┼───────────────┼───────────────────┼───┤
│ Adelie │ Torgersen │ 39.1 │ 18.7 │ 181 │ … │
│ Adelie │ Torgersen │ 39.5 │ 17.4 │ 186 │ … │
│ Adelie │ Torgersen │ 40.3 │ 18.0 │ 195 │ … │
│ Adelie │ Torgersen │ nan │ nan │ ∅ │ … │
│ Adelie │ Torgersen │ 36.7 │ 19.3 │ 193 │ … │
│ Adelie │ Torgersen │ 39.3 │ 20.6 │ 190 │ … │
│ Adelie │ Torgersen │ 38.9 │ 17.8 │ 181 │ … │
│ Adelie │ Torgersen │ 39.2 │ 19.6 │ 195 │ … │
│ Adelie │ Torgersen │ 34.1 │ 18.1 │ 193 │ … │
│ Adelie │ Torgersen │ 42.0 │ 20.2 │ 190 │ … │
│ … │ … │ … │ … │ … │ … │
└─────────┴───────────┴────────────────┴───────────────┴───────────────────┴───┘
Default implementation prints to stdout
>>> t.info() # doctest: +SKIP
Summary of penguins
344 rows
┏━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━┓
┃ Name ┃ Type ┃ # Nulls ┃ % Nulls ┃
┡━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━┩
│ species │ String(nullable=True) │ 0 │ 0.00 │
│ island │ String(nullable=True) │ 0 │ 0.00 │
│ bill_length_mm │ Float64(nullable=True) │ 2 │ 0.58 │
│ bill_depth_mm │ Float64(nullable=True) │ 2 │ 0.58 │
│ flipper_length_mm │ Int64(nullable=True) │ 2 │ 0.58 │
│ body_mass_g │ Int64(nullable=True) │ 2 │ 0.58 │
│ sex │ String(nullable=True) │ 11 │ 3.20 │
│ year │ Int64(nullable=True) │ 0 │ 0.00 │
└───────────────────┴────────────────────────┴─────────┴─────────┘
Store the info into a buffer
>>> import io
>>> buf = io.StringIO()
>>> t.info(buf=buf)
>>> "Summary of penguins" in buf.getvalue()
True
>>> t.info()
┏━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━┳━━━━━━━━━━┳━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━┳━━━┓
┃ name ┃ type ┃ nullable ┃ nulls ┃ non_nulls ┃ null_frac ┃ … ┃
┡━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━╇━━━━━━━━━━╇━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━╇━━━┩
│ string │ string │ boolean │ int64 │ int64 │ float64 │ … │
├───────────────────┼─────────┼──────────┼───────┼───────────┼───────────┼───┤
│ species │ string │ True │ 0 │ 344 │ 0.000000 │ … │
│ island │ string │ True │ 0 │ 344 │ 0.000000 │ … │
│ bill_length_mm │ float64 │ True │ 2 │ 342 │ 0.005814 │ … │
│ bill_depth_mm │ float64 │ True │ 2 │ 342 │ 0.005814 │ … │
│ flipper_length_mm │ int64 │ True │ 2 │ 342 │ 0.005814 │ … │
│ body_mass_g │ int64 │ True │ 2 │ 342 │ 0.005814 │ … │
│ sex │ string │ True │ 11 │ 333 │ 0.031977 │ … │
│ year │ int64 │ True │ 0 │ 344 │ 0.000000 │ … │
└───────────────────┴─────────┴──────────┴───────┴───────────┴───────────┴───┘
"""
import rich
import rich.table
from rich.pretty import Pretty

if buf is None:
buf = sys.stdout

metrics = [self[col].count().name(col) for col in self.columns]
metrics.append(self.count().name("nrows"))

schema = self.schema()

*items, (_, n) = self.aggregate(metrics).execute().squeeze().items()

op = self.op()
title = getattr(op, "name", type(op).__name__)

table = rich.table.Table(title=f"Summary of {title}\n{n:d} rows")

table.add_column("Name", justify="left")
table.add_column("Type", justify="left")
table.add_column("# Nulls", justify="right")
table.add_column("% Nulls", justify="right")

for column, non_nulls in items:
table.add_row(
column,
Pretty(schema[column]),
str(n - non_nulls),
f"{100 * (1.0 - non_nulls / n):>3.2f}",
from ibis import literal as lit

aggs = []

for pos, colname in enumerate(self.columns):
col = self[colname]
typ = col.type()
agg = self.select(
isna=ibis.case().when(col.isnull(), 1).else_(0).end()
).agg(
name=lit(colname),
type=lit(str(typ)),
nullable=lit(int(typ.nullable)).cast("bool"),
nulls=lambda t: t.isna.sum(),
non_nulls=lambda t: (1 - t.isna).sum(),
null_frac=lambda t: t.isna.mean(),
pos=lit(pos),
)

console = rich.get_console()
with console.capture() as capture:
console.print(table)
buf.write(capture.get())
aggs.append(agg)
return ibis.union(*aggs).order_by(ibis.asc("pos"))

def set_column(self, name: str, expr: ir.Value) -> Table:
"""Replace an existing column with a new expression.
Expand Down
4 changes: 4 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,10 @@ filterwarnings = [
"ignore:'cgi' is deprecated and slated for removal in Python 3\\.13:DeprecationWarning",
# warnings from google's use of pkg_resources
"ignore:pkg_resources is deprecated as an API:DeprecationWarning",
# sqlalchemy warns about mysql's inability to cast to bool;
# this has no effect on ibis's output because we convert types after
# execution
"ignore:Datatype BOOL does not support CAST on MySQL/MariaDB; the cast will be skipped:sqlalchemy.exc.SAWarning"
]
empty_parameter_set_mark = "fail_at_collect"
markers = [
Expand Down

0 comments on commit 71cc0e0

Please sign in to comment.