Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(api): add StringValue.as_time for parsing strings into times #10278

Merged
merged 1 commit into from
Oct 17, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 8 additions & 4 deletions ibis/backends/polars/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -955,13 +955,17 @@ def string_to_date(op, **kw):
)


@translate.register(ops.StringToTime)
def string_to_time(op, **kw):
arg = translate(op.arg, **kw)
return arg.str.to_time(format=_literal_value(op.format_str))


@translate.register(ops.StringToTimestamp)
def string_to_timestamp(op, **kw):
arg = translate(op.arg, **kw)
return arg.str.strptime(
dtype=pl.Datetime,
format=_literal_value(op.format_str),
)
format = _literal_value(op.format_str)
return arg.str.strptime(dtype=pl.Datetime, format=format)


@translate.register(ops.TimestampDiff)
Expand Down
3 changes: 3 additions & 0 deletions ibis/backends/sql/compilers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -1042,6 +1042,9 @@ def visit_NotNull(self, op, *, arg):
def visit_InValues(self, op, *, value, options):
return value.isin(*options)

def visit_StringToTime(self, op, *, arg, format_str):
return self.f.time(self.f.str_to_time(arg, format_str))

### Counting

def visit_CountDistinct(self, op, *, arg, where):
Expand Down
1 change: 1 addition & 0 deletions ibis/backends/sql/compilers/clickhouse.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ class ClickHouseCompiler(SQLGlotCompiler):
ops.TimeDelta,
ops.StringToTimestamp,
ops.StringToDate,
ops.StringToTime,
ops.Levenshtein,
)

Expand Down
1 change: 1 addition & 0 deletions ibis/backends/sql/compilers/datafusion.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ class DataFusionCompiler(SQLGlotCompiler):
ops.TypeOf,
ops.StringToDate,
ops.StringToTimestamp,
ops.StringToTime,
)

SIMPLE_OPS = {
Expand Down
1 change: 1 addition & 0 deletions ibis/backends/sql/compilers/druid.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ class DruidCompiler(SQLGlotCompiler):
ops.StringAscii,
ops.StringSplit,
ops.StringToDate,
ops.StringToTime,
ops.StringToTimestamp,
ops.TimeDelta,
ops.TimestampBucket,
Expand Down
3 changes: 3 additions & 0 deletions ibis/backends/sql/compilers/duckdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -709,5 +709,8 @@ def visit_TableUnnest(
.join(unnest, join_type="CROSS" if not keep_empty else "LEFT")
)

def visit_StringToTime(self, op, *, arg, format_str):
return self.cast(self.f.str_to_time(arg, format_str), to=dt.time)


compiler = DuckDBCompiler()
1 change: 1 addition & 0 deletions ibis/backends/sql/compilers/exasol.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ class ExasolCompiler(SQLGlotCompiler):
ops.StringSplit,
ops.StringToDate,
ops.StringToTimestamp,
ops.StringToTime,
ops.TimeDelta,
ops.TimestampAdd,
ops.TimestampBucket,
Expand Down
1 change: 1 addition & 0 deletions ibis/backends/sql/compilers/flink.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ class FlinkCompiler(SQLGlotCompiler):
ops.RowID,
ops.StringSplit,
ops.Translate,
ops.StringToTime,
)

SIMPLE_OPS = {
Expand Down
1 change: 1 addition & 0 deletions ibis/backends/sql/compilers/impala.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ class ImpalaCompiler(SQLGlotCompiler):
ops.RegexSplit,
ops.RowID,
ops.StringSplit,
ops.StringToTime,
ops.StructColumn,
ops.Time,
ops.TimeDelta,
Expand Down
1 change: 1 addition & 0 deletions ibis/backends/sql/compilers/mssql.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,7 @@ class MSSQLCompiler(SQLGlotCompiler):
ops.StringSplit,
ops.StringToDate,
ops.StringToTimestamp,
ops.StringToTime,
ops.StructColumn,
ops.TimestampDiff,
ops.Unnest,
Expand Down
1 change: 1 addition & 0 deletions ibis/backends/sql/compilers/oracle.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ class OracleCompiler(SQLGlotCompiler):
ops.ExtractDayOfYear,
ops.RowID,
ops.RandomUUID,
ops.StringToTime,
)

SIMPLE_OPS = {
Expand Down
3 changes: 3 additions & 0 deletions ibis/backends/sql/compilers/postgres.py
Original file line number Diff line number Diff line change
Expand Up @@ -827,5 +827,8 @@ def visit_ArrayAny(self, op, *, arg):
def visit_ArrayAll(self, op, *, arg):
return self._array_reduction(arg=arg, reduction="bool_and")

def visit_StringToTime(self, op, *, arg, format_str):
return self.cast(self.f.str_to_time(arg, format_str), to=dt.time)


compiler = PostgresCompiler()
1 change: 1 addition & 0 deletions ibis/backends/sql/compilers/pyspark.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ class PySparkCompiler(SQLGlotCompiler):
ops.RowID,
ops.TimestampBucket,
ops.RandomUUID,
ops.StringToTime,
)

LOWERED_OPS = {
Expand Down
1 change: 1 addition & 0 deletions ibis/backends/sql/compilers/sqlite.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ class SQLiteCompiler(SQLGlotCompiler):
ops.TimestampDiff,
ops.StringToDate,
ops.StringToTimestamp,
ops.StringToTime,
ops.TimeDelta,
ops.TimestampDelta,
ops.TryCast,
Expand Down
1 change: 1 addition & 0 deletions ibis/backends/sql/compilers/trino.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ class TrinoCompiler(SQLGlotCompiler):
ops.Median,
ops.RowID,
ops.TimestampBucket,
ops.StringToTime,
)

LOWERED_OPS = {
Expand Down
35 changes: 33 additions & 2 deletions ibis/backends/tests/test_temporal.py
Original file line number Diff line number Diff line change
Expand Up @@ -1179,7 +1179,7 @@ def test_integer_to_timestamp(backend, con, unit):
raises=com.OperationNotDefinedError,
)
@pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError)
def test_string_to_timestamp(alltypes, fmt):
def test_string_as_timestamp(alltypes, fmt):
table = alltypes
result = table.mutate(date=table.date_string_col.as_timestamp(fmt)).execute()

Expand Down Expand Up @@ -1250,7 +1250,7 @@ def test_string_to_timestamp(alltypes, fmt):
raises=com.OperationNotDefinedError,
)
@pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError)
def test_string_to_date(alltypes, fmt):
def test_string_as_date(alltypes, fmt):
table = alltypes
result = table.mutate(date=table.date_string_col.as_date(fmt)).execute()

Expand All @@ -1260,6 +1260,37 @@ def test_string_to_date(alltypes, fmt):
assert val.strftime("%m/%d/%y") == result["date_string_col"][i]


@pytest.mark.notyet(
[
"pyspark",
"exasol",
"clickhouse",
"impala",
"mssql",
"oracle",
"trino",
"druid",
"datafusion",
"flink",
],
raises=com.OperationNotDefinedError,
)
@pytest.mark.notimpl(["sqlite"], raises=com.UnsupportedOperationError)
def test_string_as_time(backend, alltypes):
fmt = "%H:%M:%S"
table = alltypes.mutate(
time_string_col=alltypes.timestamp_col.truncate("s").time().cast(str)
)
expr = table.mutate(time=table.time_string_col.as_time(fmt))
result = expr.execute()

# TEST: do we get the same date out, that we put in?
# format string assumes that we are using pandas' strftime
backend.assert_series_equal(
result["time"], result["timestamp_col"].dt.floor("s").dt.time.rename("time")
)


@pytest.mark.parametrize(
("date", "expected_index", "expected_day"),
[
Expand Down
11 changes: 11 additions & 0 deletions ibis/expr/operations/temporal.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,17 @@ class StringToDate(Value):
dtype = dt.date


@public
class StringToTime(Value):
"""Convert a string to a time."""

arg: Value[dt.String]
format_str: Value[dt.String]

shape = rlz.shape_like("args")
dtype = dt.time


@public
class ExtractTemporalField(Unary):
"""Extract a field from a temporal value."""
Expand Down
29 changes: 29 additions & 0 deletions ibis/expr/types/strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -1336,6 +1336,35 @@ def as_date(self, format_str: str) -> ir.DateValue:
def to_date(self, format_str: str) -> ir.DateValue:
return self.as_date(format_str=format_str)

def as_time(self, format_str: str) -> ir.TimeValue:
"""Parse a string and return a time.

Parameters
----------
format_str
Format string in `strptime` format

Returns
-------
TimeValue
Parsed time value

Examples
--------
>>> import ibis
>>> ibis.options.interactive = True
>>> t = ibis.memtable({"ts": ["20:01:02"]})
>>> t.ts.as_time("%H:%M:%S")
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ StringToTime(ts, '%H:%M:%S') ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
│ time │
├──────────────────────────────┤
│ 20:01:02 │
└──────────────────────────────┘
"""
return ops.StringToTime(self, format_str).to_expr()

def protocol(self):
"""Parse a URL and extract protocol.

Expand Down