Skip to content

Commit

Permalink
feat: make dayofweek impls conform to pandas semantics (#3161)
Browse files Browse the repository at this point in the history
* feat: make dayofweek impls conform to pandas semantics

* chore: implement custom day of week name for clickhouse

* test: fix test_string tests that XPASS for clickhouse

* ci: make the clickhouse container a bit lighter

* docs: add commentary about broken ClickHouse features

* docs: more clickhouse clarification
  • Loading branch information
cpcloud authored Dec 16, 2021
1 parent 9a46fc8 commit 9297828
Show file tree
Hide file tree
Showing 7 changed files with 89 additions and 31 deletions.
3 changes: 2 additions & 1 deletion .github/workflows/ibis-backends.yml
Original file line number Diff line number Diff line change
Expand Up @@ -382,10 +382,11 @@ jobs:
MYSQL_PASSWORD: ibis
options: --health-cmd="mysqladmin ping" --health-interval 10s --health-timeout 5s --health-retries 3
clickhouse:
image: yandex/clickhouse-server:18.14
image: yandex/clickhouse-server:20-alpine
ports:
- 8123:8123
- 9000:9000
options: --ulimit nofile=262144:262144
steps:
- name: checkout
uses: actions/checkout@v2
Expand Down
28 changes: 28 additions & 0 deletions ibis/backends/clickhouse/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,34 @@ def _floor_divide(expr):
return left.div(right).floor()


@rewrites(ops.DayOfWeekName)
def day_of_week_name(expr):
# ClickHouse 20 doesn't support dateName
#
# ClickHouse 21 supports dateName is broken for regexen:
# https://github.com/ClickHouse/ClickHouse/issues/32777
#
# ClickHouses 20 and 21 also have a broken case statement hence the ifnull:
# https://github.com/ClickHouse/ClickHouse/issues/32849
#
# We test against 20 in CI, so we implement day_of_week_name as follows
return (
expr.op()
.arg.day_of_week.index()
.case()
.when(0, "Monday")
.when(1, "Tuesday")
.when(2, "Wednesday")
.when(3, "Thursday")
.when(4, "Friday")
.when(5, "Saturday")
.when(6, "Sunday")
.else_("")
.end()
.nullif("")
)


class ClickhouseCompiler(Compiler):
translator_class = ClickhouseExprTranslator
table_set_formatter_class = ClickhouseTableSetFormatter
Expand Down
8 changes: 8 additions & 0 deletions ibis/backends/clickhouse/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -671,6 +671,13 @@ def _zero_if_null(translator, expr):
return f'ifNull({arg_}, 0)'


def _day_of_week_index(translator, expr):
(arg,) = expr.op().args
weekdays = 7
offset = f"toDayOfWeek({translator.translate(arg)})"
return f"((({offset} - 1) % {weekdays:d}) + {weekdays:d}) % {weekdays:d}"


_undocumented_operations = {
ops.NullLiteral: _null_literal, # undocumented
ops.IsNull: _unary('isNull'),
Expand All @@ -680,6 +687,7 @@ def _zero_if_null(translator, expr):
ops.Coalesce: _varargs('coalesce'),
ops.NullIfZero: _null_if_zero,
ops.ZeroIfNull: _zero_if_null,
ops.DayOfWeekIndex: _day_of_week_index,
}


Expand Down
43 changes: 28 additions & 15 deletions ibis/backends/clickhouse/tests/test_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -398,21 +398,34 @@ def test_translate_math_functions(con, alltypes, translate, call, expected):
@pytest.mark.parametrize(
('expr', 'expected'),
[
(L(-5).abs(), 5),
(L(5).abs(), 5),
(L(5.5).round(), 6.0),
(L(5.556).round(2), 5.56),
(L(5.556).ceil(), 6.0),
(L(5.556).floor(), 5.0),
(L(5.556).exp(), math.exp(5.556)),
(L(5.556).sign(), 1),
(L(-5.556).sign(), -1),
(L(0).sign(), 0),
(L(5.556).sqrt(), math.sqrt(5.556)),
(L(5.556).log(2), math.log(5.556, 2)),
(L(5.556).ln(), math.log(5.556)),
(L(5.556).log2(), math.log(5.556, 2)),
(L(5.556).log10(), math.log10(5.556)),
pytest.param(L(-5).abs(), 5, id="abs_neg"),
pytest.param(L(5).abs(), 5, id="abs"),
pytest.param(L(5.5).round(), 6.0, id="round"),
pytest.param(L(5.556).round(2), 5.56, id="round_places"),
pytest.param(L(5.556).ceil(), 6.0, id="ceil"),
pytest.param(L(5.556).floor(), 5.0, id="floor"),
pytest.param(L(5.556).sign(), 1, id="sign"),
pytest.param(L(-5.556).sign(), -1, id="sign_neg"),
pytest.param(L(0).sign(), 0, id="sign_zero"),
pytest.param(L(5.556).sqrt(), math.sqrt(5.556), id="sqrt"),
pytest.param(L(5.556).log(2), math.log(5.556, 2), id="log2_arg"),
pytest.param(L(5.556).log2(), math.log(5.556, 2), id="log2"),
pytest.param(L(5.556).log10(), math.log10(5.556), id="log10"),
# clickhouse has different functions for exp/ln that are faster
# than the defaults, but less precise
#
# we can't use the e() function as it still gives different results
# from `math.exp`
pytest.param(
L(5.556).exp().round(8),
round(math.exp(5.556), 8),
id="exp",
),
pytest.param(
L(5.556).ln().round(7),
round(math.log(5.556), 7),
id="ln",
),
],
)
def test_math_functions(con, expr, expected, translate):
Expand Down
14 changes: 14 additions & 0 deletions ibis/backends/mysql/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,18 @@ def _group_concat(t, expr):
return sa.func.group_concat(arg.op('SEPARATOR')(t.translate(sep)))


def _day_of_week_index(t, expr):
(arg,) = expr.op().args
left = sa.func.dayofweek(t.translate(arg)) - 2
right = 7
return ((left % right) + right) % right


def _day_of_week_name(t, expr):
(arg,) = expr.op().args
return sa.func.dayname(t.translate(arg))


operation_registry.update(
{
ops.Literal: _literal,
Expand Down Expand Up @@ -259,5 +271,7 @@ def _group_concat(t, expr):
ops.TimestampNow: fixed_arity(sa.func.now, 0),
# others
ops.GroupConcat: _group_concat,
ops.DayOfWeekIndex: _day_of_week_index,
ops.DayOfWeekName: _day_of_week_name,
}
)
6 changes: 3 additions & 3 deletions ibis/backends/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,11 +155,11 @@ def pytest_pyfunc_call(pyfuncitem):
NotImplementedError,
) as e:
markers = list(pyfuncitem.iter_markers(name="xfail_unsupported"))
if not markers:
raise
assert (
len(markers) == 1
), "More than one xfail_unsupported marker found on test {}".format(
pyfuncitem
)
), f"More than one xfail_unsupported marker found on test {pyfuncitem}"
(marker,) = markers
backend = pyfuncitem.funcargs["backend"]
assert isinstance(
Expand Down
18 changes: 6 additions & 12 deletions ibis/backends/tests/test_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def test_string_col_is_unicode(backend, alltypes, df):
param(
lambda t: t.string_col.re_search(r'\\d+'),
lambda t: t.string_col.str.contains(r'\d+'),
id='re_search_spark',
id='re_search_spark_raw',
marks=pytest.mark.xpass_backends(
('clickhouse', 'impala', 'spark')
),
Expand All @@ -84,7 +84,7 @@ def test_string_col_is_unicode(backend, alltypes, df):
param(
lambda t: t.string_col.re_replace(r'\\d+', 'a'),
lambda t: t.string_col.str.replace(r'\d+', 'a', regex=True),
id='re_replace_spark',
id='re_replace_spark_raw',
marks=pytest.mark.xpass_backends(
('clickhouse', 'impala', 'spark')
),
Expand All @@ -93,25 +93,19 @@ def test_string_col_is_unicode(backend, alltypes, df):
lambda t: t.string_col.re_search(r'\d+'),
lambda t: t.string_col.str.contains(r'\d+'),
id='re_search_spark',
marks=pytest.mark.xfail_backends(
('clickhouse', 'impala', 'spark')
),
marks=pytest.mark.xfail_backends(('impala', 'spark')),
),
param(
lambda t: t.string_col.re_extract(r'(\d+)', 0),
lambda t: t.string_col.str.extract(r'(\d+)', expand=False),
id='re_extract_spark',
marks=pytest.mark.xfail_backends(
('clickhouse', 'impala', 'spark')
),
id='re_extract_spark_raw',
marks=pytest.mark.xfail_backends(('impala', 'spark')),
),
param(
lambda t: t.string_col.re_replace(r'\d+', 'a'),
lambda t: t.string_col.str.replace(r'\d+', 'a', regex=True),
id='re_replace_spark',
marks=pytest.mark.xfail_backends(
('clickhouse', 'impala', 'spark')
),
marks=pytest.mark.xfail_backends(('impala', 'spark')),
),
param(
lambda t: t.string_col.repeat(2),
Expand Down

0 comments on commit 9297828

Please sign in to comment.