Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add to_date function to StringValue #9030

Merged
merged 15 commits into from
Apr 24, 2024
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions ibis/backends/clickhouse/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ class ClickHouseCompiler(SQLGlotCompiler):
ops.Time,
ops.TimeDelta,
ops.StringToTimestamp,
ops.StringToDate,
ops.Levenshtein,
)
)
Expand Down
1 change: 1 addition & 0 deletions ibis/backends/datafusion/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ class DataFusionCompiler(SQLGlotCompiler):
ops.TimestampNow,
ops.TypeOf,
ops.Unnest,
ops.StringToDate,
ops.StringToTimestamp,
)
)
Expand Down
1 change: 1 addition & 0 deletions ibis/backends/druid/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ class DruidCompiler(SQLGlotCompiler):
ops.Strftime,
ops.StringAscii,
ops.StringSplit,
ops.StringToDate,
ops.StringToTimestamp,
ops.TimeDelta,
ops.TimestampBucket,
Expand Down
1 change: 1 addition & 0 deletions ibis/backends/exasol/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ class ExasolCompiler(SQLGlotCompiler):
ops.Strftime,
ops.StringJoin,
ops.StringSplit,
ops.StringToDate,
ops.StringToTimestamp,
ops.TimeDelta,
ops.TimestampAdd,
Expand Down
1 change: 1 addition & 0 deletions ibis/backends/flink/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ class FlinkCompiler(SQLGlotCompiler):
ops.RegexSearch: "regexp",
ops.StrRight: "right",
ops.StringLength: "char_length",
ops.StringToDate: "to_date",
ops.StringToTimestamp: "to_timestamp",
ops.Strip: "trim",
ops.TypeOf: "typeof",
Expand Down
1 change: 1 addition & 0 deletions ibis/backends/mssql/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ class MSSQLCompiler(SQLGlotCompiler):
ops.RPad,
ops.StartsWith,
ops.StringSplit,
ops.StringToDate,
ops.StringToTimestamp,
ops.StructColumn,
ops.TimestampAdd,
Expand Down
9 changes: 9 additions & 0 deletions ibis/backends/polars/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -936,6 +936,15 @@ def interval_from_integer(op, **kw):
return _make_duration(arg, dt.Interval(unit=op.unit))


@translate.register(ops.StringToDate)
def string_to_date(op, **kw):
arg = translate(op.arg, **kw)
return arg.str.strptime(
dtype=pl.Date,
format=_literal_value(op.format_str),
)


@translate.register(ops.StringToTimestamp)
def string_to_timestamp(op, **kw):
arg = translate(op.arg, **kw)
Expand Down
2 changes: 1 addition & 1 deletion ibis/backends/sql/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,7 @@ class SQLGlotCompiler(abc.ABC):
ops.StringLength: "length",
ops.StringReplace: "replace",
ops.StringSplit: "split",
ops.StringToDate: "str_to_date",
ops.StringToTimestamp: "str_to_time",
ops.Tan: "tan",
ops.Translate: "translate",
Expand Down Expand Up @@ -801,7 +802,6 @@ def visit_IntervalFromInteger(self, op, *, arg, unit):
)

### String Instruments

def visit_Strip(self, op, *, arg):
return self.f.trim(arg, string.whitespace)

Expand Down
1 change: 1 addition & 0 deletions ibis/backends/sqlite/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ class SQLiteCompiler(SQLGlotCompiler):
ops.TimestampAdd,
ops.TimestampSub,
ops.TimestampDiff,
ops.StringToDate,
ops.StringToTimestamp,
ops.TimeDelta,
ops.DateDelta,
Expand Down
79 changes: 79 additions & 0 deletions ibis/backends/tests/test_temporal.py
Original file line number Diff line number Diff line change
Expand Up @@ -1441,6 +1441,85 @@ def test_string_to_timestamp(alltypes, fmt):
assert val.strftime("%m/%d/%y") == result["date_string_col"][i]


@pytest.mark.parametrize(
"fmt",
[
# "11/01/10" - "month/day/year"
param(
"%m/%d/%y",
id="mysql_format",
marks=[
pytest.mark.never(
["snowflake"],
reason=(
"(snowflake.connector.errors.ProgrammingError) 100096 (22007): "
"Can't parse '11/01/10' as timestamp with format '%m/%d/%y'"
),
raises=SnowflakeProgrammingError,
),
pytest.mark.never(
["flink"],
raises=ValueError,
reason="Datetime formatting style is not supported.",
),
],
),
param(
"MM/dd/yy",
id="pyspark_format",
marks=[
pytest.mark.never(
["bigquery"],
reason="400 Mismatch between format character 'M' and string character '0'",
raises=GoogleBadRequest,
),
pytest.mark.never(
["mysql"],
reason="NaTType does not support strftime",
raises=ValueError,
),
pytest.mark.never(
["trino"],
reason="datetime formatting style not supported",
raises=TrinoUserError,
),
pytest.mark.never(
["polars"],
reason="datetime formatting style not supported",
raises=PolarsComputeError,
),
pytest.mark.never(
["duckdb"],
reason="datetime formatting style not supported",
raises=DuckDBInvalidInputException,
),
],
),
],
)
@pytest.mark.notimpl(
[
"dask",
"pandas",
"clickhouse",
"sqlite",
"datafusion",
"mssql",
"druid",
],
raises=com.OperationNotDefinedError,
)
@pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError)
def test_string_to_date(alltypes, fmt):
table = alltypes
result = table.mutate(date=table.date_string_col.to_date(fmt)).execute()

# TEST: do we get the same date out, that we put in?
# format string assumes that we are using pandas' strftime
for i, val in enumerate(result["date"]):
assert val.strftime("%m/%d/%y") == result["date_string_col"][i]


@pytest.mark.parametrize(
("date", "expected_index", "expected_day"),
[
Expand Down
9 changes: 9 additions & 0 deletions ibis/expr/operations/temporal.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,15 @@ class StringToTimestamp(Value):
dtype = dt.Timestamp(timezone="UTC")


@public
class StringToDate(Value):
arg: Value[dt.String]
format_str: Value[dt.String]

shape = rlz.shape_like("arg")
dtype = dt.date


@public
class ExtractTemporalField(TemporalUnary):
dtype = dt.int32
Expand Down
29 changes: 29 additions & 0 deletions ibis/expr/types/strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -1302,6 +1302,35 @@ def to_timestamp(self, format_str: str) -> ir.TimestampValue:
"""
return ops.StringToTimestamp(self, format_str).to_expr()

def to_date(self, format_str: str) -> ir.DateValue:
"""Parse a string and return a date.

Parameters
----------
format_str
Format string in `strptime` format

Returns
-------
DateValue
Parsed date value

Examples
--------
>>> import ibis
>>> ibis.options.interactive = True
>>> t = ibis.memtable({"ts": ["20170206"]})
>>> t.ts.to_date("%Y%m%d")
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ StringToDate(ts, '%Y%m%d') ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
│ date │
├─────────────────────────────────┤
│ 2017-02-06 │
└─────────────────────────────────┘
saschahofmann marked this conversation as resolved.
Show resolved Hide resolved
"""
return ops.StringToDate(self, format_str).to_expr()

def protocol(self):
"""Parse a URL and extract protocol.

Expand Down
Loading