From 8b0fb66795547b42998cceb485c525cb6e3a9a21 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Mon, 26 Aug 2024 10:01:58 -0400 Subject: [PATCH] feat(api): add `DateValue.epoch` api for computing days since epoch (#9856) Co-authored-by: Jim Crist-Harif --- ibis/backends/polars/compiler.py | 9 +++++ ibis/backends/sql/compilers/impala.py | 12 +++++- ibis/backends/sql/compilers/oracle.py | 18 ++++++++- ibis/backends/sql/compilers/risingwave.py | 1 - ibis/backends/sql/compilers/sqlite.py | 12 +++++- ibis/backends/sqlite/udf.py | 7 ++++ ibis/backends/tests/test_temporal.py | 46 +++++++++++------------ ibis/expr/types/temporal.py | 36 ++++++++++++++++++ 8 files changed, 114 insertions(+), 27 deletions(-) diff --git a/ibis/backends/polars/compiler.py b/ibis/backends/polars/compiler.py index ac24fbacfd5e..58227f039762 100644 --- a/ibis/backends/polars/compiler.py +++ b/ibis/backends/polars/compiler.py @@ -1442,3 +1442,12 @@ def execute_group_concat(op, **kw): arg = arg.sort_by(keys, descending=descending) return pl.when(arg.count() > 0).then(arg.str.join(sep)).otherwise(None) + + +@translate.register(ops.DateDelta) +def execute_date_delta(op, **kw): + left = translate(op.left, **kw) + right = translate(op.right, **kw) + delta = left - right + method_name = f"total_{_literal_value(op.part)}s" + return getattr(delta.dt, method_name)() diff --git a/ibis/backends/sql/compilers/impala.py b/ibis/backends/sql/compilers/impala.py index 486ad813a0fa..e7cff88a9a8e 100644 --- a/ibis/backends/sql/compilers/impala.py +++ b/ibis/backends/sql/compilers/impala.py @@ -29,7 +29,6 @@ class ImpalaCompiler(SQLGlotCompiler): ops.ArrayPosition, ops.Array, ops.Covariance, - ops.DateDelta, ops.ExtractDayOfYear, ops.Levenshtein, ops.Map, @@ -314,5 +313,16 @@ def visit_Sign(self, op, *, arg): return self.cast(sign, dtype) return sign + def visit_DateDelta(self, op, *, left, right, part): + if not isinstance(part, sge.Literal): + raise com.UnsupportedOperationError( + "Only literal `part` values are supported for date delta" + ) + if part.this != "day": + raise com.UnsupportedOperationError( + f"Only 'day' part is supported for date delta in the {self.dialect} backend" + ) + return self.f.datediff(left, right) + compiler = ImpalaCompiler() diff --git a/ibis/backends/sql/compilers/oracle.py b/ibis/backends/sql/compilers/oracle.py index 5676d18611d9..20e74766e73e 100644 --- a/ibis/backends/sql/compilers/oracle.py +++ b/ibis/backends/sql/compilers/oracle.py @@ -63,7 +63,6 @@ class OracleCompiler(SQLGlotCompiler): ops.Bucket, ops.TimestampBucket, ops.TimeDelta, - ops.DateDelta, ops.TimestampDelta, ops.TimestampFromYMDHMS, ops.TimeFromHMS, @@ -474,5 +473,22 @@ def visit_GroupConcat(self, op, *, arg, where, sep, order_by): def visit_IntervalFromInteger(self, op, *, arg, unit): return self._value_to_interval(arg, unit) + def visit_DateFromYMD(self, op, *, year, month, day): + year = self.f.lpad(year, 4, "0") + month = self.f.lpad(month, 2, "0") + day = self.f.lpad(day, 2, "0") + return self.f.to_date(self.f.concat(year, month, day), "FXYYYYMMDD") + + def visit_DateDelta(self, op, *, left, right, part): + if not isinstance(part, sge.Literal): + raise com.UnsupportedOperationError( + "Only literal `part` values are supported for date delta" + ) + if part.this != "day": + raise com.UnsupportedOperationError( + f"Only 'day' part is supported for date delta in the {self.dialect} backend" + ) + return left - right + compiler = OracleCompiler() diff --git a/ibis/backends/sql/compilers/risingwave.py b/ibis/backends/sql/compilers/risingwave.py index 0091cb5408c6..ba2cad88b2c2 100644 --- a/ibis/backends/sql/compilers/risingwave.py +++ b/ibis/backends/sql/compilers/risingwave.py @@ -19,7 +19,6 @@ class RisingWaveCompiler(PostgresCompiler): UNSUPPORTED_OPS = ( ops.Arbitrary, - ops.DateFromYMD, ops.Mode, ops.RandomUUID, ops.MultiQuantile, diff --git a/ibis/backends/sql/compilers/sqlite.py b/ibis/backends/sql/compilers/sqlite.py index 8d49c9177808..304438d81b09 100644 --- a/ibis/backends/sql/compilers/sqlite.py +++ b/ibis/backends/sql/compilers/sqlite.py @@ -59,7 +59,6 @@ class SQLiteCompiler(SQLGlotCompiler): ops.StringToDate, ops.StringToTimestamp, ops.TimeDelta, - ops.DateDelta, ops.TimestampDelta, ops.TryCast, ) @@ -531,5 +530,16 @@ def visit_NonNullLiteral(self, op, *, value, dtype): raise com.UnsupportedBackendType(f"Unsupported type: {dtype!r}") return super().visit_NonNullLiteral(op, value=value, dtype=dtype) + def visit_DateDelta(self, op, *, left, right, part): + if not isinstance(part, sge.Literal): + raise com.UnsupportedOperationError( + "Only literal `part` values are supported for date delta" + ) + if part.this != "day": + raise com.UnsupportedOperationError( + f"Only 'day' part is supported for date delta in the {self.dialect} backend" + ) + return self.f._ibis_date_delta(left, right) + compiler = SQLiteCompiler() diff --git a/ibis/backends/sqlite/udf.py b/ibis/backends/sqlite/udf.py index 9e3fdff258b3..afaf1490ea19 100644 --- a/ibis/backends/sqlite/udf.py +++ b/ibis/backends/sqlite/udf.py @@ -5,6 +5,7 @@ import math import operator from collections import defaultdict +from datetime import date from typing import TYPE_CHECKING, Any, NamedTuple from urllib.parse import parse_qs, urlsplit from uuid import uuid4 @@ -357,6 +358,12 @@ def _ibis_extract_user_info(url): return f"{username}:{password}" +@udf +def _ibis_date_delta(left, right): + delta = date.fromisoformat(left) - date.fromisoformat(right) + return delta.days + + class _ibis_var: def __init__(self, offset): self.mean = 0.0 diff --git a/ibis/backends/tests/test_temporal.py b/ibis/backends/tests/test_temporal.py index 65e099300045..1c9ba8f027eb 100644 --- a/ibis/backends/tests/test_temporal.py +++ b/ibis/backends/tests/test_temporal.py @@ -1474,11 +1474,7 @@ def test_today_from_projection(alltypes): @pytest.mark.notimpl( - ["pandas", "dask", "exasol", "risingwave", "druid"], - raises=com.OperationNotDefinedError, -) -@pytest.mark.notimpl( - ["oracle"], raises=OracleDatabaseError, reason="ORA-00936 missing expression" + ["pandas", "dask", "exasol", "druid"], raises=com.OperationNotDefinedError ) def test_date_literal(con, backend): expr = ibis.date(2022, 2, 4) @@ -1709,11 +1705,7 @@ def test_interval_literal(con, backend): @pytest.mark.notimpl( - ["pandas", "dask", "exasol", "risingwave", "druid"], - raises=com.OperationNotDefinedError, -) -@pytest.mark.notimpl( - ["oracle"], raises=OracleDatabaseError, reason="ORA-00936: missing expression" + ["pandas", "dask", "exasol", "druid"], raises=com.OperationNotDefinedError ) def test_date_column_from_ymd(backend, con, alltypes, df): c = alltypes.timestamp_col @@ -1975,16 +1967,7 @@ def test_timestamp_precision_output(con, ts, scale, unit): @pytest.mark.notimpl( - [ - "dask", - "datafusion", - "druid", - "impala", - "oracle", - "pandas", - "polars", - ], - raises=com.OperationNotDefinedError, + ["dask", "datafusion", "druid", "pandas"], raises=com.OperationNotDefinedError ) @pytest.mark.parametrize( ("start", "end", "unit", "expected"), @@ -2006,7 +1989,10 @@ def test_timestamp_precision_output(con, ts, scale, unit): reason="postgres doesn't have any easy way to accurately compute the delta in specific units", raises=com.OperationNotDefinedError, ), - pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError), + pytest.mark.notimpl( + ["exasol", "polars", "sqlite", "oracle", "impala"], + raises=com.OperationNotDefinedError, + ), ], ), param(ibis.date("1992-09-30"), ibis.date("1992-10-01"), "day", 1, id="date"), @@ -2027,12 +2013,14 @@ def test_timestamp_precision_output(con, ts, scale, unit): raises=com.OperationNotDefinedError, reason="timestampdiff rounds after subtraction and mysql doesn't have a date_trunc function", ), - pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError), + pytest.mark.notimpl( + ["exasol", "polars", "sqlite", "oracle", "impala"], + raises=com.OperationNotDefinedError, + ), ], ), ], ) -@pytest.mark.notimpl(["sqlite"], raises=com.OperationNotDefinedError) def test_delta(con, start, end, unit, expected): expr = end.delta(start, unit) assert con.execute(expr) == expected @@ -2297,3 +2285,15 @@ def test_date_scalar(con, value, func): assert isinstance(result, datetime.date) assert result == datetime.date.fromisoformat(value) + + +@pytest.mark.notyet( + ["dask", "datafusion", "pandas", "druid", "exasol"], + raises=com.OperationNotDefinedError, +) +def test_simple_unix_date_offset(con): + d = ibis.date("2023-04-07") + expr = d.epoch_days() + result = con.execute(expr) + delta = datetime.date(2023, 4, 7) - datetime.date(1970, 1, 1) + assert result == delta.days diff --git a/ibis/expr/types/temporal.py b/ibis/expr/types/temporal.py index 0d90aa4ff500..9e238b1af7e9 100644 --- a/ibis/expr/types/temporal.py +++ b/ibis/expr/types/temporal.py @@ -474,6 +474,42 @@ def delta( """ return ops.DateDelta(left=self, right=other, part=part).to_expr() + def epoch_days(self) -> ir.IntegerValue: + """Return the number of days since the UNIX epoch date. + + Examples + -------- + >>> import ibis + >>> ibis.options.interactive = True + >>> date = ibis.date(2020, 1, 1) + >>> date + ┌────────────┐ + │ 2020-01-01 │ + └────────────┘ + >>> date.epoch_days() + ┌───────┐ + │ 18262 │ + └───────┘ + >>> t = date.name("date_col").as_table() + >>> t + ┏━━━━━━━━━━━━┓ + ┃ date_col ┃ + ┡━━━━━━━━━━━━┩ + │ date │ + ├────────────┤ + │ 2020-01-01 │ + └────────────┘ + >>> t.mutate(epoch=t.date_col.epoch_days()) + ┏━━━━━━━━━━━━┳━━━━━━━┓ + ┃ date_col ┃ epoch ┃ + ┡━━━━━━━━━━━━╇━━━━━━━┩ + │ date │ int64 │ + ├────────────┼───────┤ + │ 2020-01-01 │ 18262 │ + └────────────┴───────┘ + """ + return self.delta(ibis.date(1970, 1, 1), "day") + @public class DateScalar(Scalar, DateValue):