Skip to content

Commit

Permalink
feat(clickhouse): add dataframe external table support for memtables
Browse files Browse the repository at this point in the history
  • Loading branch information
cpcloud committed Sep 12, 2022
1 parent 422c98d commit bc86aa7
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 16 deletions.
9 changes: 6 additions & 3 deletions ibis/backends/base/sql/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,9 +186,7 @@ def execute(

# register all in memory tables if the backend supports cheap access
# to them
if self.compiler.cheap_in_memory_tables:
for memtable in lin.traverse(_find_memtables, expr):
self._register_in_memory_table(memtable)
self._register_in_memory_tables(expr)

with self._safe_raw_sql(sql, **kwargs) as cursor:
result = self.fetch_from_cursor(cursor, schema)
Expand All @@ -201,6 +199,11 @@ def execute(
def _register_in_memory_table(self, table_op):
raise NotImplementedError

def _register_in_memory_tables(self, expr):
if self.compiler.cheap_in_memory_tables:
for memtable in lin.traverse(_find_memtables, expr):
self._register_in_memory_table(memtable)

@abc.abstractmethod
def fetch_from_cursor(self, cursor, schema):
"""Fetch data from cursor."""
Expand Down
14 changes: 13 additions & 1 deletion ibis/backends/clickhouse/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from typing import Any, Literal, Mapping

import pandas as pd
import toolz
from clickhouse_driver.client import Client as _DriverClient
from pydantic import Field

Expand Down Expand Up @@ -38,6 +39,13 @@ class Options(ibis.config.BaseModel):
description="Database to use for temporary objects.",
)

def __init__(self, *args, external_tables=None, **kwargs):
super().__init__(*args, **kwargs)
self._external_tables = external_tables or {}

def _register_in_memory_table(self, table_op):
self._external_tables[table_op.name] = table_op.data.to_frame()

def do_connect(
self,
host: str = "localhost",
Expand All @@ -49,6 +57,7 @@ def do_connect(
compression: (
Literal["lz4", "lz4hc", "quicklz", "zstd"] | bool
) = _default_compression,
external_tables=None,
**kwargs: Any,
):
"""Create a ClickHouse client for use with Ibis.
Expand Down Expand Up @@ -92,6 +101,7 @@ def do_connect(
compression=compression,
**kwargs,
)
self._external_tables = external_tables or {}

@property
def version(self) -> str:
Expand Down Expand Up @@ -145,7 +155,9 @@ def raw_sql(
external_tables_list = []
if external_tables is None:
external_tables = {}
for name, df in external_tables.items():
for name, df in toolz.merge(
self._external_tables, external_tables
).items():
if not isinstance(df, pd.DataFrame):
raise TypeError(
'External table is not an instance of pandas dataframe'
Expand Down
6 changes: 6 additions & 0 deletions ibis/backends/clickhouse/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,11 @@ class ClickhouseTableSetFormatter(TableSetFormatter):

_non_equijoin_supported = False

def _format_in_memory_table(self, op):
# We register in memory tables as external tables because clickhouse
# doesn't implement a generic VALUES statement
return op.name


class ClickhouseExprTranslator(ExprTranslator):
_registry = operation_registry
Expand Down Expand Up @@ -118,6 +123,7 @@ def day_of_week_name(expr):


class ClickhouseCompiler(Compiler):
cheap_in_memory_tables = True
translator_class = ClickhouseExprTranslator
table_set_formatter_class = ClickhouseTableSetFormatter
select_builder_class = ClickhouseSelectBuilder
Expand Down
12 changes: 0 additions & 12 deletions ibis/backends/tests/test_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -601,10 +601,6 @@ def test_deprecated_path_argument(backend, tmp_path):
),
],
)
@pytest.mark.notyet(
["clickhouse"],
reason="ClickHouse doesn't support a VALUES construct",
)
@pytest.mark.notyet(
["mysql", "sqlite"],
reason="SQLAlchemy generates incorrect code for `VALUES` projections.",
Expand All @@ -616,10 +612,6 @@ def test_in_memory_table(backend, con, expr, expected):
backend.assert_frame_equal(result, expected)


@pytest.mark.notyet(
["clickhouse"],
reason="ClickHouse doesn't support a VALUES construct",
)
@pytest.mark.notyet(
["mysql", "sqlite"],
reason="SQLAlchemy generates incorrect code for `VALUES` projections.",
Expand All @@ -634,10 +626,6 @@ def test_filter_memory_table(backend, con):
backend.assert_frame_equal(result, expected)


@pytest.mark.notyet(
["clickhouse"],
reason="ClickHouse doesn't support a VALUES construct",
)
@pytest.mark.notyet(
["mysql", "sqlite"],
reason="SQLAlchemy generates incorrect code for `VALUES` projections.",
Expand Down

0 comments on commit bc86aa7

Please sign in to comment.