Skip to content

Commit

Permalink
feat(clickhouse): properly support native boolean types
Browse files Browse the repository at this point in the history
  • Loading branch information
cpcloud committed Jan 11, 2023
1 parent d44978c commit 31cc7ba
Show file tree
Hide file tree
Showing 7 changed files with 36 additions and 11 deletions.
3 changes: 3 additions & 0 deletions ibis/backends/clickhouse/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,9 +81,12 @@ class Options(ibis.config.Config):
----------
temp_db : str
Database to use for temporary objects.
bool_type : str
Type to use for boolean columns
"""

temp_db: str = "__ibis_tmp"
bool_type: str = "Boolean"

def __init__(self, *args, external_tables=None, **kwargs):
super().__init__(*args, **kwargs)
Expand Down
15 changes: 13 additions & 2 deletions ibis/backends/clickhouse/datatypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import parsy

import ibis
import ibis.expr.datatypes as dt
from ibis.common.parsing import (
COMMA,
Expand All @@ -19,6 +20,10 @@
)


def _bool_type():
return getattr(getattr(ibis.options, "clickhouse", None), "bool_type", "Boolean")


def parse(text: str) -> dt.DataType:
@parsy.generate
def datetime():
Expand All @@ -35,8 +40,9 @@ def datetime():
| spaceless_string("smallint", "int16", "int2").result(dt.Int16(nullable=False))
| spaceless_string("date32", "date").result(dt.Date(nullable=False))
| spaceless_string("time").result(dt.Time(nullable=False))
| spaceless_string("tinyint", "int8", "int1", "boolean", "bool").result(
dt.Int8(nullable=False)
| spaceless_string("tinyint", "int8", "int1").result(dt.Int8(nullable=False))
| spaceless_string("boolean", "bool").result(
getattr(dt, _bool_type())(nullable=False)
)
| spaceless_string("integer", "int32", "int4", "int").result(
dt.Int32(nullable=False)
Expand Down Expand Up @@ -223,6 +229,11 @@ def _(ty: dt.DataType) -> str:
return type(ty).__name__.capitalize()


@serialize_raw.register(dt.Boolean)
def _(_: dt.Boolean) -> str:
return _bool_type()


@serialize_raw.register(dt.Array)
def _(ty: dt.Array) -> str:
return f"Array({serialize(ty.value_type)})"
Expand Down
6 changes: 5 additions & 1 deletion ibis/backends/clickhouse/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,13 @@ class TestConf(UnorderedComparator, BackendTest, RoundHalfToEven):
returned_timestamp_unit = 's'
supported_to_timestamp_units = {'s'}
supports_floating_modulus = False
bool_is_int = True
supports_json = False

@property
def native_bool(self) -> bool:
[(value,)] = self.connection._client.execute("SELECT true")
return isinstance(value, bool)

@staticmethod
def _load_data(
data_dir: Path,
Expand Down
15 changes: 11 additions & 4 deletions ibis/backends/clickhouse/tests/test_operators.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import pandas as pd
import pandas.testing as tm
import pytest
from pytest import param

import ibis
import ibis.expr.datatypes as dt
Expand Down Expand Up @@ -156,11 +157,17 @@ def test_field_in_literals(con, alltypes, translate, container):
assert len(con.execute(expr))


@pytest.mark.parametrize('column', ['int_col', 'float_col', 'bool_col'])
def test_negate(con, alltypes, translate, column):
# clickhouse represent boolean as UInt8
@pytest.mark.parametrize(
("column", "operator"),
[
param("int_col", "-", id="int_col"),
param("float_col", "-", id="float_col"),
param("bool_col", "NOT ", id="bool_col"),
],
)
def test_negate(con, alltypes, translate, column, operator):
expr = -alltypes[column]
assert translate(expr.op()) == f'-{column}'
assert translate(expr.op()) == f"{operator}{column}"
assert len(con.execute(expr))


Expand Down
2 changes: 1 addition & 1 deletion ibis/backends/datafusion/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ class TestConf(BackendTest, RoundAwayFromZero):
# additional_skipped_operations = frozenset({ops.StringSQLLike})
# supports_divide_by_zero = True
# returned_timestamp_unit = 'ns'
bool_is_int = True
native_bool = False
supports_structs = False
supports_json = False

Expand Down
2 changes: 1 addition & 1 deletion ibis/backends/mysql/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ class TestConf(BackendTest, RoundHalfToEven):
returned_timestamp_unit = 's'
supports_arrays = False
supports_arrays_outside_of_select = supports_arrays
bool_is_int = True
native_bool = False
supports_structs = False

def __init__(self, data_directory: Path) -> None:
Expand Down
4 changes: 2 additions & 2 deletions ibis/backends/tests/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ class BackendTest(abc.ABC):
returned_timestamp_unit = 'us'
supported_to_timestamp_units = {'s', 'ms', 'us'}
supports_floating_modulus = True
bool_is_int = False
native_bool = True
supports_structs = True
supports_json = True
reduction_tolerance = 1e-7
Expand Down Expand Up @@ -163,7 +163,7 @@ def least(f: Callable[..., ir.Value], *args: ir.Value) -> ir.Value:
@property
def functional_alltypes(self) -> ir.Table:
t = self.connection.table('functional_alltypes')
if self.bool_is_int:
if not self.native_bool:
return t.mutate(bool_col=t.bool_col == 1)
return t

Expand Down

0 comments on commit 31cc7ba

Please sign in to comment.