From a5b72934c38d3a6422274737d5b766d7b4dd9766 Mon Sep 17 00:00:00 2001 From: Phillip Cloud <417981+cpcloud@users.noreply.github.com> Date: Tue, 15 Mar 2022 12:23:26 -0400 Subject: [PATCH] feat(clickhouse): implement trim, pad and string predicates --- ibis/backends/clickhouse/registry.py | 48 ++++++++++++++++++++++++++++ ibis/backends/tests/test_string.py | 30 ++--------------- 2 files changed, 51 insertions(+), 27 deletions(-) diff --git a/ibis/backends/clickhouse/registry.py b/ibis/backends/clickhouse/registry.py index b4914f844951..a9ab309a6410 100644 --- a/ibis/backends/clickhouse/registry.py +++ b/ibis/backends/clickhouse/registry.py @@ -566,6 +566,46 @@ def _string_like(translator, expr): ) +def _string_ilike(translator, expr): + op = expr.op() + return 'lower({}) LIKE lower({})'.format( + translator.translate(op.arg), + translator.translate(op.pattern), + ) + + +def _startswith(translator, expr): + op = expr.op() + arg = op.arg + start = op.start + tr_arg = translator.translate(arg) + tr_start = translator.translate(start) + return f"startsWith({tr_arg}, {tr_start})" + + +def _endswith(translator, expr): + op = expr.op() + arg = translator.translate(op.arg) + end = translator.translate(op.end) + return f"endsWith({arg}, {end})" + + +def _lpad(translator, expr): + op = expr.op() + arg = translator.translate(op.arg) + length = translator.translate(op.length) + pad = translator.translate(op.pad) + return f"leftPad({arg}, {length}, {pad})" + + +def _rpad(translator, expr): + op = expr.op() + arg = translator.translate(op.arg) + length = translator.translate(op.length) + pad = translator.translate(op.pad) + return f"rightPad({arg}, {length}, {pad})" + + def _group_concat(translator, expr): arg, sep, where = expr.op().args if where is not None: @@ -646,6 +686,14 @@ def _group_concat(translator, expr): ops.StringJoin: _string_join, ops.StringSplit: _string_split, ops.StringSQLLike: _string_like, + ops.StringSQLILike: _string_ilike, + ops.StartsWith: _startswith, + ops.EndsWith: _endswith, + ops.LPad: _lpad, + ops.RPad: _rpad, + ops.LStrip: _unary('trimLeft'), + ops.RStrip: _unary('trimRight'), + ops.Strip: _unary('trimBoth'), ops.Repeat: _string_repeat, ops.RegexSearch: _fixed_arity('match', 2), # TODO: extractAll(haystack, pattern)[index + 1] diff --git a/ibis/backends/tests/test_string.py b/ibis/backends/tests/test_string.py index f24823708b7c..e2d0c8cc65c4 100644 --- a/ibis/backends/tests/test_string.py +++ b/ibis/backends/tests/test_string.py @@ -48,14 +48,7 @@ def test_string_col_is_unicode(backend, alltypes, df): lambda t: t.string_col.ilike('6%'), lambda t: t.string_col.str.contains('6.*'), id='ilike', - marks=pytest.mark.notimpl( - [ - "clickhouse", - "datafusion", - "impala", - "pyspark", - ] - ), + marks=pytest.mark.notimpl(["datafusion", "impala", "pyspark"]), ), param( lambda t: t.string_col.re_search(r'[[:digit:]]+'), @@ -124,13 +117,11 @@ def test_string_col_is_unicode(backend, alltypes, df): lambda t: t.string_col.lpad(10, 'a'), lambda t: t.string_col.str.pad(10, fillchar='a', side='left'), id='lpad', - marks=pytest.mark.notimpl(["clickhouse"]), ), param( lambda t: t.string_col.rpad(10, 'a'), lambda t: t.string_col.str.pad(10, fillchar='a', side='right'), id='rpad', - marks=pytest.mark.notimpl(["clickhouse"]), ), param( lambda t: t.string_col.find_in_set(['1']), @@ -179,43 +170,28 @@ def test_string_col_is_unicode(backend, alltypes, df): lambda t: t.string_col.startswith('foo'), lambda t: t.string_col.str.startswith('foo'), id='startswith', - marks=pytest.mark.notimpl( - ["clickhouse", "dask", "datafusion", "pandas"] - ), + marks=pytest.mark.notimpl(["dask", "datafusion", "pandas"]), ), param( lambda t: t.string_col.endswith('foo'), lambda t: t.string_col.str.endswith('foo'), id='endswith', - marks=pytest.mark.notimpl( - ["clickhouse", "dask", "datafusion", "pandas"] - ), + marks=pytest.mark.notimpl(["dask", "datafusion", "pandas"]), ), param( lambda t: t.string_col.strip(), lambda t: t.string_col.str.strip(), id='strip', - marks=pytest.mark.notimpl(["clickhouse"]), ), param( lambda t: t.string_col.lstrip(), lambda t: t.string_col.str.lstrip(), id='lstrip', - marks=pytest.mark.notimpl( - [ - "clickhouse", - ] - ), ), param( lambda t: t.string_col.rstrip(), lambda t: t.string_col.str.rstrip(), id='rstrip', - marks=pytest.mark.notimpl( - [ - "clickhouse", - ] - ), ), param( lambda t: t.string_col.capitalize(),