From 57f999960eb7d80b2a88d2b3e744fc7e14e7bb97 Mon Sep 17 00:00:00 2001 From: Saul Pwanson Date: Fri, 4 Feb 2022 15:54:57 -0800 Subject: [PATCH] feat(date): add ibis.date(y,m,d) functionality ref: #386 feat(datetime): add ibis.time(y,m,d,h,m,s) functionality --- ibis/backends/base/sql/alchemy/registry.py | 3 + ibis/backends/base/sql/registry/main.py | 1 + ibis/backends/duckdb/registry.py | 1 + ibis/backends/postgres/registry.py | 2 + ibis/backends/sqlite/registry.py | 27 +++++ ibis/backends/tests/test_temporal.py | 72 +++++++++++++ ibis/expr/api.py | 118 +++++++++++++++------ ibis/expr/operations/temporal.py | 39 +++++++ ibis/tests/expr/test_temporal.py | 6 ++ 9 files changed, 238 insertions(+), 31 deletions(-) diff --git a/ibis/backends/base/sql/alchemy/registry.py b/ibis/backends/base/sql/alchemy/registry.py index 71377c42580da..bbeb098aadfac 100644 --- a/ibis/backends/base/sql/alchemy/registry.py +++ b/ibis/backends/base/sql/alchemy/registry.py @@ -506,6 +506,9 @@ def _string_join(t, expr): # other ops.SortKey: _sort_key, ops.Date: unary(lambda arg: sa.cast(arg, sa.DATE)), + ops.DateFromYMD: fixed_arity(sa.func.date, 3), + ops.TimeFromHMS: varargs(sa.func.time), + ops.TimestampFromYMDHMS: varargs(sa.func.make_timestamp), } diff --git a/ibis/backends/base/sql/registry/main.py b/ibis/backends/base/sql/registry/main.py index cfcfd4784feb1..ca9124c884bbf 100644 --- a/ibis/backends/base/sql/registry/main.py +++ b/ibis/backends/base/sql/registry/main.py @@ -277,6 +277,7 @@ def hash(translator, expr): ops.Count: aggregate.reduction('count'), ops.CountDistinct: aggregate.count_distinct, # string operations + ops.StringConcat: fixed_arity('concat', 2), ops.StringLength: unary('length'), ops.StringAscii: unary('ascii'), ops.Lowercase: unary('lower'), diff --git a/ibis/backends/duckdb/registry.py b/ibis/backends/duckdb/registry.py index a3cbbaf4edbbd..757d721a8b278 100644 --- a/ibis/backends/duckdb/registry.py +++ b/ibis/backends/duckdb/registry.py @@ -160,6 +160,7 @@ def _regex_extract(t, expr): ops.TableColumn: _table_column, ops.TimestampDiff: fixed_arity('age', 2), ops.TimestampFromUNIX: _timestamp_from_unix, + ops.DateFromYMD: fixed_arity('make_date', 3), ops.Translate: fixed_arity('replace', 3), ops.TimestampNow: fixed_arity('now', 0), ops.RegexExtract: _regex_extract, diff --git a/ibis/backends/postgres/registry.py b/ibis/backends/postgres/registry.py index a945989eca27c..cd2f00c26fa37 100644 --- a/ibis/backends/postgres/registry.py +++ b/ibis/backends/postgres/registry.py @@ -674,6 +674,7 @@ def _day_of_week_name(t, expr): ops.Round: _round, ops.Modulus: _mod, # dates and times + ops.DateFromYMD: fixed_arity(sa.func.make_date, 3), ops.DateTruncate: _timestamp_truncate, ops.TimestampTruncate: _timestamp_truncate, ops.IntervalFromInteger: _interval_from_integer, @@ -706,6 +707,7 @@ def _day_of_week_name(t, expr): ops.RandomScalar: _random, # now is in the timezone of the server, but we want UTC ops.TimestampNow: lambda *_: sa.func.timezone('UTC', sa.func.now()), + ops.TimeFromHMS: fixed_arity(sa.func.make_time, 3), ops.CumulativeAll: unary(sa.func.bool_and), ops.CumulativeAny: unary(sa.func.bool_or), # array operations diff --git a/ibis/backends/sqlite/registry.py b/ibis/backends/sqlite/registry.py index bb1d7fe686bad..91d09ef613231 100644 --- a/ibis/backends/sqlite/registry.py +++ b/ibis/backends/sqlite/registry.py @@ -302,9 +302,33 @@ def _string_concat(t, expr): return functools.reduce(operator.add, map(t.translate, args)) +def _date_from_ymd(t, expr): + y, m, d = map(t.translate, expr.op().args) + ymdstr = sa.func.printf('%04d-%02d-%02d', y, m, d) + return sa.func.date(ymdstr) + + +def _timestamp_from_ymdhms(t, expr): + y, mo, d, h, m, s, *rest = [ + t.translate(x) if x is not None else None for x in expr.op().args + ] + tz = rest[0] if rest else '' + timestr = sa.func.printf( + '%04d-%02d-%02d %02d:%02d:%02d%s', y, mo, d, h, m, s, tz + ) + return sa.func.datetime(timestr) + + +def _time_from_hms(t, expr): + h, m, s = map(t.translate, expr.op().args) + timestr = sa.func.printf('%02d:%02d:%02d', h, m, s) + return sa.func.time(timestr) + + operation_registry.update( { ops.Cast: _cast, + ops.DateFromYMD: _date_from_ymd, ops.Substring: _substr, ops.StrRight: _string_right, ops.StringFind: _string_find, @@ -313,6 +337,9 @@ def _string_concat(t, expr): ops.Least: varargs(sa.func.min), ops.Greatest: varargs(sa.func.max), ops.IfNull: fixed_arity(sa.func.ifnull, 2), + ops.DateFromYMD: _date_from_ymd, + ops.TimeFromHMS: _time_from_hms, + ops.TimestampFromYMDHMS: _timestamp_from_ymdhms, ops.DateTruncate: _truncate(sa.func.date), ops.Date: unary(sa.func.date), ops.TimestampTruncate: _truncate(sa.func.datetime), diff --git a/ibis/backends/tests/test_temporal.py b/ibis/backends/tests/test_temporal.py index a49aaa48e4de4..5c17b70be9f02 100644 --- a/ibis/backends/tests/test_temporal.py +++ b/ibis/backends/tests/test_temporal.py @@ -622,3 +622,75 @@ def test_now_from_projection(backend, alltypes): now = pd.Timestamp('now') year_expected = pd.Series([now.year] * n, name='ts') tm.assert_series_equal(ts.dt.year, year_expected) + + +@pytest.mark.notimpl(["pandas", "datafusion", "mysql", "dask", "pyspark"]) +@pytest.mark.notyet(["clickhouse", "impala"]) +def test_date_literal_ex(con): + expr = ibis.date(2022, 2, 4) + result = con.execute(expr) + assert result.strftime('%Y-%m-%d') == '2022-02-04' + + +@pytest.mark.notimpl(["pandas", "datafusion", "mysql", "dask", "pyspark"]) +@pytest.mark.notyet(["clickhouse", "impala"]) +def test_timestamp_literal_ex(con): + expr = ibis.timestamp(2022, 2, 4, 16, 20, 0) + result = con.execute(expr) + if not isinstance(result, str): + result = result.strftime('%Y-%m-%d %H:%M:%S%Z') + assert result == '2022-02-04 16:20:00' + + +@pytest.mark.notimpl(["pandas", "datafusion", "mysql", "dask", "pyspark"]) +@pytest.mark.notyet(["clickhouse", "impala"]) +def test_time_literal_ex(con): + expr = ibis.time(16, 20, 0) + result = con.execute(expr) + if not isinstance(result, str): + result = result.strftime('%H:%M:%S') + assert result == '16:20:00' + + +@pytest.mark.notimpl(["pandas", "datafusion", "mysql", "dask", "pyspark"]) +@pytest.mark.notyet(["clickhouse", "impala"]) +def test_date_column_from_ymd(con, alltypes, df): + c = alltypes.timestamp_col + expr = ibis.date(c.year(), c.month(), c.day()) + tbl = alltypes[ + expr.name('timestamp_col'), + ] + result = con.execute(tbl) + + golden = df.timestamp_col.dt.date.astype('datetime64[ns]') + tm.assert_series_equal(golden, result.timestamp_col) + + +@pytest.mark.notimpl(["datafusion", "impala"]) +def test_date_scalar_from_iso(con): + expr = ibis.literal('2022-02-24') + expr2 = ibis.date(expr) + + result = con.execute(expr2) + assert result.strftime('%Y-%m-%d') == '2022-02-24' + + +@pytest.mark.notimpl(["datafusion", "impala", "pyspark"]) +def test_date_column_from_iso(con, alltypes, df): + expr = ( + alltypes.year.cast('string') + + '-' + + alltypes.month.cast('string').lpad(2, '0') + + '-13' + ) + expr = ibis.date(expr) + + result = con.execute(expr) + golden = ( + df.year.astype(str) + + '-' + + df.month.astype(str).str.rjust(2, '0') + + '-13' + ) + actual = result.dt.strftime('%Y-%m-%d') + tm.assert_series_equal(golden.rename('tmp'), actual.rename('tmp')) diff --git a/ibis/expr/api.py b/ibis/expr/api.py index 8be92a749c5d4..c5252c370c6c6 100644 --- a/ibis/expr/api.py +++ b/ibis/expr/api.py @@ -3,10 +3,12 @@ from __future__ import annotations import datetime +import functools import numbers -from typing import Iterable, Mapping, Sequence, TypeVar +from typing import Iterable, Literal, Mapping, Sequence, TypeVar import dateutil.parser +import numpy as np import pandas as pd import ibis.expr.builders as bl @@ -371,8 +373,10 @@ def asc(expr: ir.ColumnExpr | str) -> ir.SortExpr | ops.DeferredSortKey: return ops.SortKey(expr).to_expr() +@functools.singledispatch def timestamp( - value: str | numbers.Integral, + value, + *args, timezone: str | None = None, ) -> ir.TimestampScalar: """Construct a timestamp literal if `value` is coercible to a timestamp. @@ -389,23 +393,53 @@ def timestamp( TimestampScalar A timestamp expression """ - if isinstance(value, str): - try: - value = pd.Timestamp(value, tz=timezone) - except pd.errors.OutOfBoundsDatetime: - value = dateutil.parser.parse(value) - if isinstance(value, numbers.Integral): - raise TypeError( - ( - "Passing an integer to ibis.timestamp is not supported. Use " - "ibis.literal({value}).to_timestamp() to create a timestamp " - "expression from an integer." - ).format(value=value) - ) + raise NotImplementedError(f'cannot convert {type(value)} to timestamp') + + +@timestamp.register(np.int8) +@timestamp.register(np.int16) +@timestamp.register(np.int32) +@timestamp.register(np.int64) +@timestamp.register(np.uint8) +@timestamp.register(np.uint16) +@timestamp.register(np.uint32) +@timestamp.register(np.uint64) +@timestamp.register(np.float32) +@timestamp.register(np.float64) +@timestamp.register(int) +@timestamp.register(float) +def _(value, *args, timezone: str | None = None) -> ir.TimestampScalar: + if timezone: + raise NotImplementedError('timestamp timezone not implemented') + + if not args: # only one value + raise TypeError(f"Use ibis.literal({value}).to_timestamp") + + # pass through to datetime constructor + return ops.TimestampFromYMDHMS(value, *args).to_expr() + + +@timestamp.register(pd.Timestamp) +def _(value, timezone: str | None = None) -> ir.TimestampScalar: return literal(value, type=dt.Timestamp(timezone=timezone)) -def date(value: str) -> ir.DateScalar: +@timestamp.register(datetime.datetime) +def _(value, timezone: str | None = None) -> ir.TimestampScalar: + return literal(value, type=dt.Timestamp(timezone=timezone)) + + +@timestamp.register(str) +def _(value: str, timezone: str | None = None) -> ir.TimestampScalar: + try: + value = pd.Timestamp(value, tz=timezone) + except pd.errors.OutOfBoundsDatetime: + value = dateutil.parser.parse(value) + return literal(value, type=dt.Timestamp(timezone=timezone)) + + +@functools.singledispatch +def date(value) -> DateValue: """Return a date literal if `value` is coercible to a date. Parameters @@ -418,29 +452,51 @@ def date(value: str) -> ir.DateScalar: DateScalar A date expression """ - if isinstance(value, str): - value = pd.to_datetime(value).date() + raise NotImplementedError() + + +@date.register(str) +def _(value: str) -> ir.DateScalar: + return literal(pd.to_datetime(value).date(), type=dt.date) + + +@date.register(pd.Timestamp) +def _(value) -> ir.DateScalar: return literal(value, type=dt.date) -def time(value: str) -> ir.TimeScalar: - """Return a time literal if `value` is coercible to a time. +@date.register(IntegerColumn) +@date.register(int) +def _(year, month, day) -> ir.DateScalar: + return ops.DateFromYMD(year, month, day).to_expr() - Parameters - ---------- - value - Time string - Returns - ------- - TimeScalar - A time expression - """ - if isinstance(value, str): - value = pd.to_datetime(value).time() +@date.register(StringValue) +def _(value: StringValue) -> DateValue: + return value.cast(dt.date) + + +@functools.singledispatch +def time(value) -> TimeValue: return literal(value, type=dt.time) +@time.register(str) +def _(value: str) -> ir.TimeScalar: + return literal(pd.to_datetime(value).time(), type=dt.time) + + +@time.register(IntegerColumn) +@time.register(int) +def _(hours, mins, secs) -> ir.TimeScalar: + return ops.TimeFromHMS(hours, mins, secs).to_expr() + + +@time.register(StringValue) +def _(value: StringValue) -> TimeValue: + return value.cast(dt.time) + + def interval( value: int | datetime.timedelta | None = None, unit: str = 's', diff --git a/ibis/expr/operations/temporal.py b/ibis/expr/operations/temporal.py index 692573627b6d3..92a1ea880f34d 100644 --- a/ibis/expr/operations/temporal.py +++ b/ibis/expr/operations/temporal.py @@ -213,6 +213,45 @@ class Date(UnaryOp): output_type = rlz.shape_like('arg', dt.date) +@public +class DateFromYMD(ValueOp): + year = rlz.integer + month = rlz.integer + day = rlz.integer + output_type = rlz.shape_like('args', dt.date) + + +@public +class TimeFromHMS(ValueOp): + hours = rlz.integer + minutes = rlz.integer + seconds = rlz.integer + output_type = rlz.shape_like('args', dt.time) + + +@public +class TimestampFromYMDHMS(ValueOp): + year = rlz.integer + month = rlz.integer + day = rlz.integer + hours = rlz.integer + minutes = rlz.integer + seconds = rlz.integer + timezone = rlz.optional(rlz.string) + output_type = rlz.shape_like('args', dt.timestamp) + + @property + def arg(self): + return ( + self.year, + self.month, + self.day, + self.hours, + self.minutes, + self.seconds, + ) + + @public class TimestampFromUNIX(ValueOp): arg = rlz.any diff --git a/ibis/tests/expr/test_temporal.py b/ibis/tests/expr/test_temporal.py index daf0ec8628007..e451a3a32950c 100644 --- a/ibis/tests/expr/test_temporal.py +++ b/ibis/tests/expr/test_temporal.py @@ -705,3 +705,9 @@ def test_time_truncate(table, operand, unit): expr = operand(table).truncate(unit) assert isinstance(expr, ir.TimeValue) assert isinstance(expr.op(), ops.TimeTruncate) + + +def test_date_time_literals(): + ibis.date(2022, 2, 4) + ibis.time(16, 20, 00) + ibis.timestamp(2022, 2, 4, 16, 20, 00)