Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Basic integration with ClickHouse #1844

Merged
merged 7 commits into from
May 3, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions docs/installation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,9 @@ Here's a list of some of the recommended packages.
| Vertica | ``pip install | ``vertica+vertica_python://`` |
| | sqlalchemy-vertica-python`` | |
+---------------+-------------------------------------+-------------------------------------------------+
| ClickHouse | ``pip install | ``clickhouse://`` |
| | sqlalchemy-clickhouse`` | |
+---------------+-------------------------------------+-------------------------------------------------+

Note that many other database are supported, the main criteria being the
existence of a functional SqlAlchemy dialect and Python driver. Googling
Expand Down
22 changes: 16 additions & 6 deletions superset/connectors/sqla/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,9 @@ def get_sqla_query( # sqla
if granularity not in self.dttm_cols:
granularity = self.main_dttm_col

# Database spec supports join-free timeslot grouping
time_groupby_inline = self.database.db_engine_spec.time_groupby_inline

cols = {col.column_name: col for col in self.columns}
metrics_dict = {m.metric_name: m for m in self.metrics}

Expand All @@ -386,7 +389,7 @@ def get_sqla_query( # sqla
if timeseries_limit_metric:
timeseries_limit_metric_expr = \
timeseries_limit_metric.sqla_col
if metrics:
if metrics_exprs:
main_metric_expr = metrics_exprs[0]
else:
main_metric_expr = literal_column("COUNT(*)").label("ccount")
Expand Down Expand Up @@ -436,13 +439,20 @@ def visit_column(element, compiler, **kw):

dttm_col = cols[granularity]
time_grain = extras.get('time_grain_sqla')
time_filters = []

if is_timeseries:
timestamp = dttm_col.get_timestamp_expression(time_grain)
select_exprs += [timestamp]
groupby_exprs += [timestamp]

time_filter = dttm_col.get_time_filter(from_dttm, to_dttm)
# Use main dttm column to support index with secondary dttm columns
if self.database.db_engine_spec.time_secondary_columns and \
self.main_dttm_col in self.dttm_cols and \
self.main_dttm_col != dttm_col.column_name:
time_filters.append(cols[self.main_dttm_col].
get_time_filter(from_dttm, to_dttm))
time_filters.append(dttm_col.get_time_filter(from_dttm, to_dttm))

select_exprs += metrics_exprs
qry = sa.select(select_exprs)
Expand Down Expand Up @@ -509,7 +519,7 @@ def visit_column(element, compiler, **kw):
having = template_processor.process_template(having)
having_clause_and += [sa.text('({})'.format(having))]
if granularity:
qry = qry.where(and_(*([time_filter] + where_clause_and)))
qry = qry.where(and_(*(time_filters + where_clause_and)))
else:
qry = qry.where(and_(*where_clause_and))
qry = qry.having(and_(*having_clause_and))
Expand All @@ -522,7 +532,8 @@ def visit_column(element, compiler, **kw):

qry = qry.limit(row_limit)

if is_timeseries and timeseries_limit and groupby:
if is_timeseries and \
timeseries_limit and groupby and not time_groupby_inline:
# some sql dialects require for order by expressions
# to also be in the select clause -- others, e.g. vertica,
# require a unique inner alias
Expand Down Expand Up @@ -620,8 +631,7 @@ def fetch_metadata(self):
if not any_date_col and dbcol.is_time:
any_date_col = col.name

quoted = "{}".format(
column(dbcol.column_name).compile(dialect=db_dialect))
quoted = "{}".format(col.compile(dialect=db_dialect))
if dbcol.sum:
metrics.append(M(
metric_name='sum__' + dbcol.column_name,
Expand Down
40 changes: 40 additions & 0 deletions superset/db_engine_specs.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,9 @@ class BaseEngineSpec(object):
engine = 'base' # str as defined in sqlalchemy.engine.engine
cursor_execute_kwargs = {}
time_grains = tuple()
time_groupby_inline = False
limit_method = LimitMethod.FETCH_MANY
time_secondary_columns = False

@classmethod
def fetch_data(cls, cursor, limit):
Expand Down Expand Up @@ -865,6 +867,44 @@ def convert_dttm(cls, target_type, dttm):
def epoch_to_dttm(cls):
return "from_unixtime({col})"


class ClickHouseEngineSpec(BaseEngineSpec):
"""Dialect for ClickHouse analytical DB."""

engine = 'clickhouse'

time_secondary_columns = True
time_groupby_inline = True
time_grains = (
Grain('Time Column', _('Time Column'), '{col}'),
Grain('minute', _('minute'),
"toStartOfMinute(toDateTime({col}))"),
Grain('5 minute', _('5 minute'),
"toDateTime(intDiv(toUInt32(toDateTime({col})), 300)*300)"),
Grain('10 minute', _('10 minute'),
"toDateTime(intDiv(toUInt32(toDateTime({col})), 600)*600)"),
Grain('hour', _('hour'),
"toStartOfHour(toDateTime({col}))"),
Grain('day', _('day'),
"toStartOfDay(toDateTime({col}))"),
Grain('month', _('month'),
"toStartOfMonth(toDateTime({col}))"),
Grain('quarter', _('quarter'),
"toStartOfQuarter(toDateTime({col}))"),
Grain('year', _('year'),
"toStartOfYear(toDateTime({col}))"),
)

@classmethod
def convert_dttm(cls, target_type, dttm):
tt = target_type.upper()
if tt == 'DATE':
return "toDate('{}')".format(dttm.strftime('%Y-%m-%d'))
if tt == 'DATETIME':
return "toDateTime('{}')".format(
dttm.strftime('%Y-%m-%d %H:%M:%S'))
return "'{}'".format(dttm.strftime('%Y-%m-%d %H:%M:%S'))

engines = {
o.engine: o for o in globals().values()
if inspect.isclass(o) and issubclass(o, BaseEngineSpec)}
4 changes: 3 additions & 1 deletion superset/viz.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,9 @@ def query_obj(self):
# [column_name in list_of_values]. `__` prefix is there to avoid
# potential conflicts with column that would be named `from` or `to`
since = (
extra_filters.get('__from') or form_data.get("since", "1 year ago")
extra_filters.get('__from') or
form_data.get("since") or
config.get("SUPERSET_DEFAULT_SINCE", "1 year ago")
)

from_dttm = utils.parse_human_datetime(since)
Expand Down