Skip to content

Commit

Permalink
feat(datatype): add optional timestamp scale parameter
Browse files Browse the repository at this point in the history
  • Loading branch information
cpcloud committed Jan 20, 2023
1 parent dec70f5 commit a38115a
Show file tree
Hide file tree
Showing 7 changed files with 135 additions and 36 deletions.
36 changes: 23 additions & 13 deletions ibis/backends/clickhouse/datatypes.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import annotations

import functools
from functools import partial

import parsy

Expand All @@ -25,14 +26,30 @@ def _bool_type():


def parse(text: str) -> dt.DataType:
@parsy.generate
def datetime():
yield spaceless_string("datetime64", "datetime")
timezone = yield parened_string.optional()
return dt.Timestamp(timezone=timezone, nullable=False)
parened_string = LPAREN.then(RAW_STRING).skip(RPAREN)

datetime64_args = LPAREN.then(
parsy.seq(
scale=parsy.decimal_digit.map(int).optional(),
timezone=COMMA.then(RAW_STRING).optional(),
)
).skip(RPAREN)

datetime64 = spaceless_string("datetime64").then(
datetime64_args.optional(default={}).combine_dict(
partial(dt.Timestamp, nullable=False)
)
)

datetime = spaceless_string("datetime").then(
parsy.seq(timezone=parened_string.optional()).combine_dict(
partial(dt.Timestamp, nullable=False)
)
)

primitive = (
datetime
datetime64
| datetime
| spaceless_string("null", "nothing").result(dt.null)
| spaceless_string("bigint", "int64").result(dt.Int64(nullable=False))
| spaceless_string("double", "float64").result(dt.Float64(nullable=False))
Expand Down Expand Up @@ -67,13 +84,6 @@ def datetime():
).result(dt.String(nullable=False))
)

@parsy.generate
def parened_string():
yield LPAREN
s = yield RAW_STRING
yield RPAREN
return s

@parsy.generate
def nullable():
yield spaceless_string("nullable")
Expand Down
56 changes: 56 additions & 0 deletions ibis/backends/tests/test_temporal.py
Original file line number Diff line number Diff line change
Expand Up @@ -964,3 +964,59 @@ def test_large_timestamp(con):
expr = ibis.timestamp("4567-01-01 00:00:00")
result = con.execute(expr)
assert result.replace(tzinfo=None) == huge_timestamp


@pytest.mark.parametrize(
("ts", "scale", "unit"),
[
param(
'2023-01-07 13:20:05.561',
3,
"ms",
id="ms",
marks=pytest.mark.broken(["mssql"], reason="incorrect result"),
),
param(
'2023-01-07 13:20:05.561021',
6,
"us",
id="us",
marks=[
pytest.mark.broken(["mssql"], reason="incorrect result"),
pytest.mark.notyet(["sqlite"], reason="doesn't support microseconds"),
],
),
param(
'2023-01-07 13:20:05.561000231',
9,
"ns",
id="ns",
marks=[
pytest.mark.broken(
[
"clickhouse",
"duckdb",
"impala",
"mssql",
"postgres",
"pyspark",
"sqlite",
"trino",
],
reason="drivers appear to truncate nanos",
),
pytest.mark.notyet(
["bigquery"],
reason="bigquery doesn't support nanosecond timestamps",
),
],
),
],
)
@pytest.mark.notyet(["mysql"])
def test_timestamp_precision_output(con, ts, scale, unit):
dtype = dt.Timestamp(scale=scale)
expr = ibis.literal(ts).cast(dtype)
result = con.execute(expr)
expected = pd.Timestamp(ts).floor(unit)
assert result == expected
24 changes: 16 additions & 8 deletions ibis/backends/trino/datatypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,19 @@
import parsy as p
import sqlalchemy as sa
from sqlalchemy.ext.compiler import compiles
from trino.sqlalchemy.datatype import DOUBLE, JSON, MAP, ROW
from trino.sqlalchemy.datatype import DOUBLE, JSON, MAP, ROW, TIMESTAMP
from trino.sqlalchemy.dialect import TrinoDialect

import ibis.expr.datatypes as dt
from ibis.backends.base.sql.alchemy import to_sqla_type
from ibis.common.parsing import (
COMMA,
FIELD,
LANGLE,
LPAREN,
PRECISION,
RANGLE,
RPAREN,
SCALE,
SINGLE_DIGIT,
spaceless,
spaceless_string,
)
Expand Down Expand Up @@ -51,7 +50,7 @@ def parse(text: str, default_decimal_parameters=(18, 3)) -> DataType:
@p.generate
def timestamp():
yield spaceless_string("timestamp")
yield LPAREN.then(PRECISION).skip(RPAREN).optional()
yield LPAREN.then(SINGLE_DIGIT.map(int)).skip(RPAREN).optional()
return Timestamp(timezone="UTC")

primitive = (
Expand Down Expand Up @@ -89,9 +88,9 @@ def decimal():

@p.generate
def angle_type():
yield LANGLE
yield LPAREN
value_type = yield ty
yield RANGLE
yield RPAREN
return value_type

@p.generate
Expand All @@ -103,11 +102,11 @@ def array():
@p.generate
def map():
yield spaceless_string("map")
yield LANGLE
yield LPAREN
key_type = yield primitive
yield COMMA
value_type = yield ty
yield RANGLE
yield RPAREN
return Map(key_type, value_type)

field = spaceless(FIELD)
Expand Down Expand Up @@ -152,6 +151,15 @@ def sa_trino_map(dialect, satype, nullable=True):
)


@dt.dtype.register(TrinoDialect, TIMESTAMP)
def sa_trino_timestamp(_, satype, nullable=True):
return dt.Timestamp(
timezone="UTC" if satype.timezone else None,
scale=satype.precision,
nullable=nullable,
)


@dt.dtype.register(TrinoDialect, JSON)
def sa_trino_json(_, satype, nullable=True):
return dt.JSON(nullable=nullable)
Expand Down
1 change: 1 addition & 0 deletions ibis/common/parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ def spaceless_string(*strings: str):


RAW_NUMBER = parsy.decimal_digit.at_least(1).concat()
SINGLE_DIGIT = parsy.decimal_digit
PRECISION = SCALE = NUMBER = RAW_NUMBER.map(int)

LPAREN = spaceless_string("(")
Expand Down
10 changes: 7 additions & 3 deletions ibis/expr/datatypes/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -344,14 +344,18 @@ class Timestamp(Temporal):
timezone = optional(instance_of(str))
"""The timezone of values of this type."""

scale = optional(isin(range(10)))
"""The scale of the timestamp if known."""

scalar = ir.TimestampScalar
column = ir.TimestampColumn

@property
def _pretty_piece(self) -> str:
if (timezone := self.timezone) is not None:
return f"({timezone!r})"
return ""
pieces = [
repr(piece) for piece in (self.scale, self.timezone) if piece is not None
]
return f"({', '.join(pieces)})" * bool(pieces)


@public
Expand Down
29 changes: 18 additions & 11 deletions ibis/expr/datatypes/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
RPAREN,
SCALE,
SEMICOLON,
SINGLE_DIGIT,
spaceless,
spaceless_string,
)
Expand Down Expand Up @@ -147,18 +148,24 @@ def decimal():
) or (None, None)
return dt.Decimal(precision=precision, scale=scale)

@parsy.generate
def parened_string():
yield LPAREN
s = yield RAW_STRING
yield RPAREN
return s
parened_string = LPAREN.then(RAW_STRING).skip(RPAREN)
timestamp_scale = SINGLE_DIGIT.map(int)

@parsy.generate
def timestamp():
yield spaceless_string("timestamp")
tz = yield parened_string
return dt.Timestamp(tz)
timestamp_tz_args = (
LPAREN.then(
parsy.seq(timezone=RAW_STRING, scale=COMMA.then(timestamp_scale).optional())
)
.skip(RPAREN)
.combine_dict(dict)
)

timestamp_no_tz_args = LPAREN.then(parsy.seq(scale=timestamp_scale).skip(RPAREN))

timestamp = spaceless_string("timestamp").then(
parsy.alt(timestamp_tz_args, timestamp_no_tz_args)
.optional(default={})
.combine_dict(dt.Timestamp)
)

@parsy.generate
def angle_type():
Expand Down
15 changes: 14 additions & 1 deletion ibis/tests/expr/test_datatypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -364,7 +364,7 @@ def test_timestamp_with_invalid_timezone():

def test_timestamp_with_timezone_repr():
ts = dt.Timestamp('UTC')
assert repr(ts) == "Timestamp(timezone='UTC', nullable=True)"
assert repr(ts) == "Timestamp(timezone='UTC', scale=None, nullable=True)"


def test_timestamp_with_timezone_str():
Expand Down Expand Up @@ -532,6 +532,19 @@ def test_parse_null():
assert dt.parse("null") == dt.null


@pytest.mark.parametrize("scale", range(10))
@pytest.mark.parametrize("tz", ["UTC", "America/New_York"])
def test_timestamp_with_scale(scale, tz):
assert dt.parse(f"timestamp({tz!r}, {scale:d})") == dt.Timestamp(
timezone=tz, scale=scale
)


@pytest.mark.parametrize("scale", range(10))
def test_timestamp_with_scale_no_tz(scale):
assert dt.parse(f"timestamp({scale:d})") == dt.Timestamp(scale=scale)


def get_leaf_classes(op):
for child_class in op.__subclasses__():
yield child_class
Expand Down

0 comments on commit a38115a

Please sign in to comment.