Skip to content

Commit

Permalink
feat: add to_date function to StringValue (#9030)
Browse files Browse the repository at this point in the history
## Description of changes

* Adds `to_date` to string types that accepts a format string and parses
a string to Date type.

* Uses a cast to timestamp and extract time as a fallback like this
```python
def visit_StringToDate(self, op, *, arg, format_str):
        return self.f.date(self.f.str_to_time(arg, format_str))
```
* Implements native functions for bigquery, clickhouse, MySQL, oracle,
postgres, and snowflake

## Issues closed
Implements half of #8908

---------

Co-authored-by: Gil Forsyth <gforsyth@users.noreply.github.com>
  • Loading branch information
saschahofmann and gforsyth authored Apr 24, 2024
1 parent 9355281 commit 0701978
Show file tree
Hide file tree
Showing 13 changed files with 135 additions and 1 deletion.
1 change: 1 addition & 0 deletions ibis/backends/clickhouse/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ class ClickHouseCompiler(SQLGlotCompiler):
ops.Time,
ops.TimeDelta,
ops.StringToTimestamp,
ops.StringToDate,
ops.Levenshtein,
)
)
Expand Down
1 change: 1 addition & 0 deletions ibis/backends/datafusion/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ class DataFusionCompiler(SQLGlotCompiler):
ops.TimestampNow,
ops.TypeOf,
ops.Unnest,
ops.StringToDate,
ops.StringToTimestamp,
)
)
Expand Down
1 change: 1 addition & 0 deletions ibis/backends/druid/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ class DruidCompiler(SQLGlotCompiler):
ops.Strftime,
ops.StringAscii,
ops.StringSplit,
ops.StringToDate,
ops.StringToTimestamp,
ops.TimeDelta,
ops.TimestampBucket,
Expand Down
1 change: 1 addition & 0 deletions ibis/backends/exasol/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ class ExasolCompiler(SQLGlotCompiler):
ops.Strftime,
ops.StringJoin,
ops.StringSplit,
ops.StringToDate,
ops.StringToTimestamp,
ops.TimeDelta,
ops.TimestampAdd,
Expand Down
1 change: 1 addition & 0 deletions ibis/backends/flink/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ class FlinkCompiler(SQLGlotCompiler):
ops.RegexSearch: "regexp",
ops.StrRight: "right",
ops.StringLength: "char_length",
ops.StringToDate: "to_date",
ops.StringToTimestamp: "to_timestamp",
ops.Strip: "trim",
ops.TypeOf: "typeof",
Expand Down
1 change: 1 addition & 0 deletions ibis/backends/mssql/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ class MSSQLCompiler(SQLGlotCompiler):
ops.RPad,
ops.StartsWith,
ops.StringSplit,
ops.StringToDate,
ops.StringToTimestamp,
ops.StructColumn,
ops.TimestampAdd,
Expand Down
9 changes: 9 additions & 0 deletions ibis/backends/polars/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -936,6 +936,15 @@ def interval_from_integer(op, **kw):
return _make_duration(arg, dt.Interval(unit=op.unit))


@translate.register(ops.StringToDate)
def string_to_date(op, **kw):
arg = translate(op.arg, **kw)
return arg.str.strptime(
dtype=pl.Date,
format=_literal_value(op.format_str),
)


@translate.register(ops.StringToTimestamp)
def string_to_timestamp(op, **kw):
arg = translate(op.arg, **kw)
Expand Down
1 change: 1 addition & 0 deletions ibis/backends/snowflake/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ class SnowflakeCompiler(SQLGlotCompiler):
ops.Hash: "hash",
ops.Median: "median",
ops.Mode: "mode",
ops.StringToDate: "to_date",
ops.StringToTimestamp: "to_timestamp_tz",
ops.TimeFromHMS: "time_from_parts",
ops.TimestampFromYMDHMS: "timestamp_from_parts",
Expand Down
2 changes: 1 addition & 1 deletion ibis/backends/sql/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,7 @@ class SQLGlotCompiler(abc.ABC):
ops.StringLength: "length",
ops.StringReplace: "replace",
ops.StringSplit: "split",
ops.StringToDate: "str_to_date",
ops.StringToTimestamp: "str_to_time",
ops.Tan: "tan",
ops.Translate: "translate",
Expand Down Expand Up @@ -801,7 +802,6 @@ def visit_IntervalFromInteger(self, op, *, arg, unit):
)

### String Instruments

def visit_Strip(self, op, *, arg):
return self.f.trim(arg, string.whitespace)

Expand Down
1 change: 1 addition & 0 deletions ibis/backends/sqlite/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ class SQLiteCompiler(SQLGlotCompiler):
ops.TimestampAdd,
ops.TimestampSub,
ops.TimestampDiff,
ops.StringToDate,
ops.StringToTimestamp,
ops.TimeDelta,
ops.DateDelta,
Expand Down
79 changes: 79 additions & 0 deletions ibis/backends/tests/test_temporal.py
Original file line number Diff line number Diff line change
Expand Up @@ -1441,6 +1441,85 @@ def test_string_to_timestamp(alltypes, fmt):
assert val.strftime("%m/%d/%y") == result["date_string_col"][i]


@pytest.mark.parametrize(
"fmt",
[
# "11/01/10" - "month/day/year"
param(
"%m/%d/%y",
id="mysql_format",
marks=[
pytest.mark.never(
["snowflake"],
reason=(
"(snowflake.connector.errors.ProgrammingError) 100096 (22007): "
"Can't parse '11/01/10' as timestamp with format '%m/%d/%y'"
),
raises=SnowflakeProgrammingError,
),
pytest.mark.never(
["flink"],
raises=ValueError,
reason="Datetime formatting style is not supported.",
),
],
),
param(
"MM/dd/yy",
id="pyspark_format",
marks=[
pytest.mark.never(
["bigquery"],
reason="400 Mismatch between format character 'M' and string character '0'",
raises=GoogleBadRequest,
),
pytest.mark.never(
["mysql"],
reason="NaTType does not support strftime",
raises=ValueError,
),
pytest.mark.never(
["trino"],
reason="datetime formatting style not supported",
raises=TrinoUserError,
),
pytest.mark.never(
["polars"],
reason="datetime formatting style not supported",
raises=PolarsComputeError,
),
pytest.mark.never(
["duckdb"],
reason="datetime formatting style not supported",
raises=DuckDBInvalidInputException,
),
],
),
],
)
@pytest.mark.notimpl(
[
"dask",
"pandas",
"clickhouse",
"sqlite",
"datafusion",
"mssql",
"druid",
],
raises=com.OperationNotDefinedError,
)
@pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError)
def test_string_to_date(alltypes, fmt):
table = alltypes
result = table.mutate(date=table.date_string_col.to_date(fmt)).execute()

# TEST: do we get the same date out, that we put in?
# format string assumes that we are using pandas' strftime
for i, val in enumerate(result["date"]):
assert val.strftime("%m/%d/%y") == result["date_string_col"][i]


@pytest.mark.parametrize(
("date", "expected_index", "expected_day"),
[
Expand Down
9 changes: 9 additions & 0 deletions ibis/expr/operations/temporal.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,15 @@ class StringToTimestamp(Value):
dtype = dt.Timestamp(timezone="UTC")


@public
class StringToDate(Value):
arg: Value[dt.String]
format_str: Value[dt.String]

shape = rlz.shape_like("arg")
dtype = dt.date


@public
class ExtractTemporalField(TemporalUnary):
dtype = dt.int32
Expand Down
29 changes: 29 additions & 0 deletions ibis/expr/types/strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -1302,6 +1302,35 @@ def to_timestamp(self, format_str: str) -> ir.TimestampValue:
"""
return ops.StringToTimestamp(self, format_str).to_expr()

def to_date(self, format_str: str) -> ir.DateValue:
"""Parse a string and return a date.
Parameters
----------
format_str
Format string in `strptime` format
Returns
-------
DateValue
Parsed date value
Examples
--------
>>> import ibis
>>> ibis.options.interactive = True
>>> t = ibis.memtable({"ts": ["20170206"]})
>>> t.ts.to_date("%Y%m%d")
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ StringToDate(ts, '%Y%m%d') ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
│ date │
├────────────────────────────┤
│ 2017-02-06 │
└────────────────────────────┘
"""
return ops.StringToDate(self, format_str).to_expr()

def protocol(self):
"""Parse a URL and extract protocol.
Expand Down

0 comments on commit 0701978

Please sign in to comment.