Skip to content

Commit

Permalink
feat(date): add ibis.date(y,m,d) functionality
Browse files Browse the repository at this point in the history
ref: #386

feat(datetime): add ibis.time(y,m,d,h,m,s) functionality
  • Loading branch information
Saul Pwanson authored and cpcloud committed Mar 25, 2022
1 parent dd40652 commit 26892b6
Show file tree
Hide file tree
Showing 8 changed files with 219 additions and 31 deletions.
5 changes: 5 additions & 0 deletions ibis/backends/base/sql/alchemy/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -506,6 +506,11 @@ def _string_join(t, expr):
# other
ops.SortKey: _sort_key,
ops.Date: unary(lambda arg: sa.cast(arg, sa.DATE)),
ops.DateFromYMD: fixed_arity(sa.func.date, 3),
ops.TimeFromHMS: fixed_arity(sa.func.time, 3),
ops.TimestampFromYMDHMS: lambda t, expr: sa.func.make_timestamp(
*map(t.translate, expr.op().args[:6]) # ignore timezone
),
}


Expand Down
1 change: 1 addition & 0 deletions ibis/backends/base/sql/registry/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,7 @@ def hash(translator, expr):
ops.Count: aggregate.reduction('count'),
ops.CountDistinct: aggregate.count_distinct,
# string operations
ops.StringConcat: fixed_arity('concat', 2),
ops.StringLength: unary('length'),
ops.StringAscii: unary('ascii'),
ops.Lowercase: unary('lower'),
Expand Down
2 changes: 2 additions & 0 deletions ibis/backends/postgres/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -674,6 +674,7 @@ def _day_of_week_name(t, expr):
ops.Round: _round,
ops.Modulus: _mod,
# dates and times
ops.DateFromYMD: fixed_arity(sa.func.make_date, 3),
ops.DateTruncate: _timestamp_truncate,
ops.TimestampTruncate: _timestamp_truncate,
ops.IntervalFromInteger: _interval_from_integer,
Expand Down Expand Up @@ -706,6 +707,7 @@ def _day_of_week_name(t, expr):
ops.RandomScalar: _random,
# now is in the timezone of the server, but we want UTC
ops.TimestampNow: lambda *_: sa.func.timezone('UTC', sa.func.now()),
ops.TimeFromHMS: fixed_arity(sa.func.make_time, 3),
ops.CumulativeAll: unary(sa.func.bool_and),
ops.CumulativeAny: unary(sa.func.bool_or),
# array operations
Expand Down
27 changes: 27 additions & 0 deletions ibis/backends/sqlite/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,9 +302,33 @@ def _string_concat(t, expr):
return functools.reduce(operator.add, map(t.translate, args))


def _date_from_ymd(t, expr):
y, m, d = map(t.translate, expr.op().args)
ymdstr = sa.func.printf('%04d-%02d-%02d', y, m, d)
return sa.func.date(ymdstr)


def _timestamp_from_ymdhms(t, expr):
y, mo, d, h, m, s, *rest = (
t.translate(x) if x is not None else None for x in expr.op().args
)
tz = rest[0] if rest else ''
timestr = sa.func.printf(
'%04d-%02d-%02d %02d:%02d:%02d%s', y, mo, d, h, m, s, tz
)
return sa.func.datetime(timestr)


def _time_from_hms(t, expr):
h, m, s = map(t.translate, expr.op().args)
timestr = sa.func.printf('%02d:%02d:%02d', h, m, s)
return sa.func.time(timestr)


operation_registry.update(
{
ops.Cast: _cast,
ops.DateFromYMD: _date_from_ymd,
ops.Substring: _substr,
ops.StrRight: _string_right,
ops.StringFind: _string_find,
Expand All @@ -313,6 +337,9 @@ def _string_concat(t, expr):
ops.Least: varargs(sa.func.min),
ops.Greatest: varargs(sa.func.max),
ops.IfNull: fixed_arity(sa.func.ifnull, 2),
ops.DateFromYMD: _date_from_ymd,
ops.TimeFromHMS: _time_from_hms,
ops.TimestampFromYMDHMS: _timestamp_from_ymdhms,
ops.DateTruncate: _truncate(sa.func.date),
ops.Date: unary(sa.func.date),
ops.TimestampTruncate: _truncate(sa.func.datetime),
Expand Down
72 changes: 72 additions & 0 deletions ibis/backends/tests/test_temporal.py
Original file line number Diff line number Diff line change
Expand Up @@ -622,3 +622,75 @@ def test_now_from_projection(backend, alltypes):
now = pd.Timestamp('now')
year_expected = pd.Series([now.year] * n, name='ts')
tm.assert_series_equal(ts.dt.year, year_expected)


@pytest.mark.notimpl(["pandas", "datafusion", "mysql", "dask", "pyspark"])
@pytest.mark.notyet(["clickhouse", "impala"])
def test_date_literal(con):
expr = ibis.date(2022, 2, 4)
result = con.execute(expr)
assert result.strftime('%Y-%m-%d') == '2022-02-04'


@pytest.mark.notimpl(["pandas", "datafusion", "mysql", "dask", "pyspark"])
@pytest.mark.notyet(["clickhouse", "impala"])
def test_timestamp_literal(con):
expr = ibis.timestamp(2022, 2, 4, 16, 20, 0)
result = con.execute(expr)
if not isinstance(result, str):
result = result.strftime('%Y-%m-%d %H:%M:%S%Z')
assert result == '2022-02-04 16:20:00'


@pytest.mark.notimpl(["pandas", "datafusion", "mysql", "dask", "pyspark"])
@pytest.mark.notyet(["clickhouse", "impala"])
def test_time_literal(con):
expr = ibis.time(16, 20, 0)
result = con.execute(expr)
if not isinstance(result, str):
result = result.strftime('%H:%M:%S')
assert result == '16:20:00'


@pytest.mark.notimpl(["pandas", "datafusion", "mysql", "dask", "pyspark"])
@pytest.mark.notyet(["clickhouse", "impala"])
def test_date_column_from_ymd(con, alltypes, df):
c = alltypes.timestamp_col
expr = ibis.date(c.year(), c.month(), c.day())
tbl = alltypes[
expr.name('timestamp_col'),
]
result = con.execute(tbl)

golden = df.timestamp_col.dt.date.astype('datetime64[ns]')
tm.assert_series_equal(golden, result.timestamp_col)


@pytest.mark.notimpl(["datafusion", "impala"])
def test_date_scalar_from_iso(con):
expr = ibis.literal('2022-02-24')
expr2 = ibis.date(expr)

result = con.execute(expr2)
assert result.strftime('%Y-%m-%d') == '2022-02-24'


@pytest.mark.notimpl(["datafusion", "impala", "pyspark"])
def test_date_column_from_iso(con, alltypes, df):
expr = (
alltypes.year.cast('string')
+ '-'
+ alltypes.month.cast('string').lpad(2, '0')
+ '-13'
)
expr = ibis.date(expr)

result = con.execute(expr)
golden = (
df.year.astype(str)
+ '-'
+ df.month.astype(str).str.rjust(2, '0')
+ '-13'
)
actual = result.dt.strftime('%Y-%m-%d')
tm.assert_series_equal(golden.rename('tmp'), actual.rename('tmp'))
109 changes: 78 additions & 31 deletions ibis/expr/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,11 @@
from __future__ import annotations

import datetime
import numbers
import functools
from typing import Iterable, Mapping, Sequence, TypeVar

import dateutil.parser
import numpy as np
import pandas as pd

import ibis.expr.builders as bl
Expand Down Expand Up @@ -371,8 +372,10 @@ def asc(expr: ir.ColumnExpr | str) -> ir.SortExpr | ops.DeferredSortKey:
return ops.SortKey(expr).to_expr()


@functools.singledispatch
def timestamp(
value: str | numbers.Integral,
value,
*args,
timezone: str | None = None,
) -> ir.TimestampScalar:
"""Construct a timestamp literal if `value` is coercible to a timestamp.
Expand All @@ -389,23 +392,45 @@ def timestamp(
TimestampScalar
A timestamp expression
"""
if isinstance(value, str):
try:
value = pd.Timestamp(value, tz=timezone)
except pd.errors.OutOfBoundsDatetime:
value = dateutil.parser.parse(value)
if isinstance(value, numbers.Integral):
raise TypeError(
(
"Passing an integer to ibis.timestamp is not supported. Use "
"ibis.literal({value}).to_timestamp() to create a timestamp "
"expression from an integer."
).format(value=value)
)
raise NotImplementedError(f'cannot convert {type(value)} to timestamp')


@timestamp.register(np.integer)
@timestamp.register(np.floating)
@timestamp.register(int)
@timestamp.register(float)
def _(value, *args, timezone: str | None = None) -> ir.TimestampScalar:
if timezone:
raise NotImplementedError('timestamp timezone not implemented')

if not args: # only one value
raise TypeError(f"Use ibis.literal({value}).to_timestamp")

# pass through to datetime constructor
return ops.TimestampFromYMDHMS(value, *args).to_expr()


@timestamp.register(pd.Timestamp)
def _(value, timezone: str | None = None) -> ir.TimestampScalar:
return literal(value, type=dt.Timestamp(timezone=timezone))


def date(value: str) -> ir.DateScalar:
@timestamp.register(datetime.datetime)
def _(value, timezone: str | None = None) -> ir.TimestampScalar:
return literal(value, type=dt.Timestamp(timezone=timezone))


@timestamp.register(str)
def _(value: str, timezone: str | None = None) -> ir.TimestampScalar:
try:
value = pd.Timestamp(value, tz=timezone)
except pd.errors.OutOfBoundsDatetime:
value = dateutil.parser.parse(value)
return literal(value, type=dt.Timestamp(timezone=timezone))


@functools.singledispatch
def date(value) -> DateValue:
"""Return a date literal if `value` is coercible to a date.
Parameters
Expand All @@ -418,29 +443,51 @@ def date(value: str) -> ir.DateScalar:
DateScalar
A date expression
"""
if isinstance(value, str):
value = pd.to_datetime(value).date()
raise NotImplementedError()


@date.register(str)
def _(value: str) -> ir.DateScalar:
return literal(pd.to_datetime(value).date(), type=dt.date)


@date.register(pd.Timestamp)
def _(value) -> ir.DateScalar:
return literal(value, type=dt.date)


def time(value: str) -> ir.TimeScalar:
"""Return a time literal if `value` is coercible to a time.
@date.register(IntegerColumn)
@date.register(int)
def _(year, month, day) -> ir.DateScalar:
return ops.DateFromYMD(year, month, day).to_expr()

Parameters
----------
value
Time string

Returns
-------
TimeScalar
A time expression
"""
if isinstance(value, str):
value = pd.to_datetime(value).time()
@date.register(StringValue)
def _(value: StringValue) -> DateValue:
return value.cast(dt.date)


@functools.singledispatch
def time(value) -> TimeValue:
return literal(value, type=dt.time)


@time.register(str)
def _(value: str) -> ir.TimeScalar:
return literal(pd.to_datetime(value).time(), type=dt.time)


@time.register(IntegerColumn)
@time.register(int)
def _(hours, mins, secs) -> ir.TimeScalar:
return ops.TimeFromHMS(hours, mins, secs).to_expr()


@time.register(StringValue)
def _(value: StringValue) -> TimeValue:
return value.cast(dt.time)


def interval(
value: int | datetime.timedelta | None = None,
unit: str = 's',
Expand Down
28 changes: 28 additions & 0 deletions ibis/expr/operations/temporal.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,34 @@ class Date(UnaryOp):
output_type = rlz.shape_like('arg', dt.date)


@public
class DateFromYMD(ValueOp):
year = rlz.integer
month = rlz.integer
day = rlz.integer
output_type = rlz.shape_like('args', dt.date)


@public
class TimeFromHMS(ValueOp):
hours = rlz.integer
minutes = rlz.integer
seconds = rlz.integer
output_type = rlz.shape_like('args', dt.time)


@public
class TimestampFromYMDHMS(ValueOp):
year = rlz.integer
month = rlz.integer
day = rlz.integer
hours = rlz.integer
minutes = rlz.integer
seconds = rlz.integer
timezone = rlz.optional(rlz.string)
output_type = rlz.shape_like('args', dt.timestamp)


@public
class TimestampFromUNIX(ValueOp):
arg = rlz.any
Expand Down
6 changes: 6 additions & 0 deletions ibis/tests/expr/test_temporal.py
Original file line number Diff line number Diff line change
Expand Up @@ -705,3 +705,9 @@ def test_time_truncate(table, operand, unit):
expr = operand(table).truncate(unit)
assert isinstance(expr, ir.TimeValue)
assert isinstance(expr.op(), ops.TimeTruncate)


def test_date_time_literals():
assert ibis.date(2022, 2, 4).type() == dt.date
assert ibis.time(16, 20, 00).type() == dt.time
assert ibis.timestamp(2022, 2, 4, 16, 20, 00).type() == dt.timestamp

0 comments on commit 26892b6

Please sign in to comment.