From 2eb8c1be8de5c6269171c217991559394afd87a1 Mon Sep 17 00:00:00 2001 From: Maxime Beauchemin Date: Fri, 2 Oct 2015 22:54:12 -0700 Subject: [PATCH] Introducing time comparison --- panoramix/bin/panoramix | 7 +- panoramix/forms.py | 35 +++++++--- panoramix/models.py | 21 ++++-- panoramix/static/widgets/viz_nvd3.js | 4 +- panoramix/templates/panoramix/datasource.html | 3 + panoramix/viz.py | 69 ++++++++++++++----- 6 files changed, 106 insertions(+), 33 deletions(-) diff --git a/panoramix/bin/panoramix b/panoramix/bin/panoramix index b897bca22445a..461341e8ca198 100755 --- a/panoramix/bin/panoramix +++ b/panoramix/bin/panoramix @@ -1,6 +1,7 @@ #!/usr/bin/env python import csv +from datetime import datetime import gzip import json from subprocess import Popen @@ -8,7 +9,7 @@ from subprocess import Popen from flask.ext.script import Manager from flask.ext.migrate import MigrateCommand from panoramix import db -from sqlalchemy import Column, Integer, String, Table +from sqlalchemy import Column, Integer, String, Table, DateTime from panoramix import app from panoramix import models @@ -59,7 +60,7 @@ def load_examples(sample): Column("year", Integer), Column("name", String(128)), Column("num", Integer), - Column("ds", String(20)), + Column("ds", DateTime), Column("gender", String(10)), Column("sum_boys", Integer), Column("sum_girls", Integer), @@ -78,7 +79,7 @@ def load_examples(sample): continue if num == "NA": num = 0 - ds = str(year) + '-01-01' + ds = datetime(int(year), 1, 1) db.engine.execute( BirthNames.insert(), state=state, diff --git a/panoramix/forms.py b/panoramix/forms.py index b38821e8fd00f..0a0b45d66cda6 100644 --- a/panoramix/forms.py +++ b/panoramix/forms.py @@ -1,6 +1,7 @@ from wtforms import ( Field, Form, SelectMultipleField, SelectField, TextField, TextAreaField, BooleanField, IntegerField, HiddenField) +from wtforms import validators from copy import copy from panoramix import app config = app.config @@ -58,7 +59,7 @@ def __init__(self, viz): description="One or many metrics to display"), 'groupby': SelectMultipleField( 'Group by', - choices=[(s, s) for s in datasource.groupby_column_names], + choices=self.choicify(datasource.groupby_column_names), description="One or many fields to group by"), 'granularity': TextField( 'Time Granularity', default="one day", @@ -75,19 +76,24 @@ def __init__(self, viz): SelectField( 'Row limit', default=config.get("ROW_LIMIT"), - choices=[(s, s) for s in self.row_limits]), + choices=self.choicify(self.row_limits)), 'limit': SelectField( - 'Series limit', choices=[(s, s) for s in self.series_limits], + 'Series limit', + choices=self.choicify(self.series_limits), default=50, description=( "Limits the number of time series that get displayed")), 'rolling_type': SelectField( 'Rolling', + default='mean', choices=[(s, s) for s in ['mean', 'sum', 'std']], description=( "Defines a rolling window function to apply")), - 'rolling_periods': TextField('Periods', description=( + 'rolling_periods': IntegerField( + 'Periods', + validators=[validators.optional()], + description=( "Defines the size of the rolling window function, " "relative to the 'granularity' field")), 'series': SelectField( @@ -118,7 +124,7 @@ def __init__(self, viz): description="Suffix to apply after the percentage display"), 'markup_type': SelectField( "Markup Type", - choices=[(s, s) for s in ['markdown', 'html']], + choices=self.choicify(['markdown', 'html']), default="markdown", description="Pick your favorite markup language"), 'rotation': SelectField( @@ -128,9 +134,9 @@ def __init__(self, viz): description="Rotation to apply to words in the cloud"), 'line_interpolation': SelectField( "Line Interpolation", - choices=[(s, s) for s in [ + choices=self.choicify([ 'linear', 'basis', 'cardinal', 'monotone', - 'step-before', 'step-after']], + 'step-before', 'step-after']), default='linear', description="Line interpolation as defined by d3.js"), 'code': TextAreaField("Code", description="Put your code here"), @@ -168,11 +174,24 @@ def __init__(self, viz): description="Compute the contribution to the total"), 'num_period_compare': IntegerField( "Period Ratio", default=None, + validators=[validators.optional()], description=( - "Number of period to compare against, " + "[integer] Number of period to compare against, " "this is relative to the granularity selected")), + 'time_compare': TextField( + "Time Shift Compare", + default="1 week ago", + description=( + "Overlay a timeseries from a " + "relative time period. Expects relative time delta " + "in natural language (example: 24 hours, 7 days, " + "56 weeks, 365 days")), } + @staticmethod + def choicify(l): + return [("{}".format(obj), "{}".format(obj)) for obj in l] + def get_form(self, previous=False): px_form_fields = self.field_dict viz = self.viz diff --git a/panoramix/models.py b/panoramix/models.py index 91dcd7a0ffccb..3c3a24b4878b5 100644 --- a/panoramix/models.py +++ b/panoramix/models.py @@ -327,6 +327,7 @@ def query( filter=None, is_timeseries=True, timeseries_limit=15, row_limit=None, + inner_from_dttm=None, inner_to_dttm=None, extras=None): qry_start_dttm = datetime.now() @@ -363,10 +364,17 @@ def query( from_clause = table(self.table_name) qry = qry.group_by(*groupby_exprs) - where_clause_and = [ + time_filter = [ timestamp >= from_dttm.isoformat(), - timestamp < to_dttm.isoformat(), + timestamp <= to_dttm.isoformat(), ] + inner_time_filter = copy(time_filter) + if inner_from_dttm: + inner_time_filter[0] = timestamp >= inner_from_dttm.isoformat() + if inner_to_dttm: + inner_time_filter[1] = timestamp <= inner_to_dttm.isoformat() + + where_clause_and = [] for col, op, eq in filter: if op in ('in', 'not in'): values = eq.split(",") @@ -376,14 +384,14 @@ def query( where_clause_and.append(cond) if extras and 'where' in extras: where_clause_and += [text(extras['where'])] - qry = qry.where(and_(*where_clause_and)) + qry = qry.where(and_(*(time_filter + where_clause_and))) qry = qry.order_by(desc(main_metric_expr)) qry = qry.limit(row_limit) if timeseries_limit and groupby: subq = select(inner_groupby_exprs) subq = subq.select_from(table(self.table_name)) - subq = subq.where(and_(*where_clause_and)) + subq = subq.where(and_(*(where_clause_and + inner_time_filter))) subq = subq.group_by(*inner_groupby_exprs) subq = subq.order_by(desc(main_metric_expr)) subq = subq.limit(timeseries_limit) @@ -677,9 +685,13 @@ def query( is_timeseries=True, timeseries_limit=None, row_limit=None, + inner_from_dttm=None, inner_to_dttm=None, extras=None): qry_start_dttm = datetime.now() + inner_from_dttm = inner_from_dttm or from_dttm + inner_to_dttm = inner_to_dttm or to_dttm + # add tzinfo to native datetime with config from_dttm = from_dttm.replace(tzinfo=config.get("DRUID_TZ")) to_dttm = to_dttm.replace(tzinfo=config.get("DRUID_TZ")) @@ -738,6 +750,7 @@ def query( pre_qry['limit_spec'] = { "type": "default", "limit": timeseries_limit, + 'intervals': inner_from_dttm.isoformat() + '/' + inner_to_dttm.isoformat(), "columns": [{ "dimension": metrics[0] if metrics else self.metrics[0], "direction": "descending", diff --git a/panoramix/static/widgets/viz_nvd3.js b/panoramix/static/widgets/viz_nvd3.js index 14dbd9922147f..2d7e4547f5436 100644 --- a/panoramix/static/widgets/viz_nvd3.js +++ b/panoramix/static/widgets/viz_nvd3.js @@ -50,7 +50,9 @@ function viz_nvd3(token_name, json_callback) { chart.yAxis.tickFormat(d3.format('.3s')); if (viz.form_data.contribution || viz.form_data.num_period_compare) { chart.yAxis.tickFormat(d3.format('.3p')); - chart.y2Axis.tickFormat(d3.format('.3p')); + if (chart.y2Axis != undefined) { + chart.y2Axis.tickFormat(d3.format('.3p')); + } } } else if (viz_type === 'dist_bar') { diff --git a/panoramix/templates/panoramix/datasource.html b/panoramix/templates/panoramix/datasource.html index 7beb01c0f9f83..cedb23deaaf48 100644 --- a/panoramix/templates/panoramix/datasource.html +++ b/panoramix/templates/panoramix/datasource.html @@ -98,6 +98,9 @@

{{ viz.verbose_name }} data-toggle="modal" data-target="#query_modal">query


+ {% block messages %} + {% endblock %} + {% include 'appbuilder/flash.html' %}
{% block viz_html %} {% if viz.error_msg %} diff --git a/panoramix/viz.py b/panoramix/viz.py index 631578fc5f2f3..45006cc51ba6a 100644 --- a/panoramix/viz.py +++ b/panoramix/viz.py @@ -1,4 +1,5 @@ from collections import OrderedDict, defaultdict +from copy import copy from datetime import datetime import json import uuid @@ -40,6 +41,11 @@ def __init__(self, datasource, form_data): form = form_class(form_data) else: form = form_class(**form_data) + if not form.validate(): + for k, v in form.errors.items(): + if not k.startswith("flt") and not form_data.get('token'): + flash("{}: {}".format(k, " ".join(v)), 'danger') + data = form.data.copy() previous_viz_type = form_data.get('previous_viz_type') if previous_viz_type in viz_types and previous_viz_type != self.viz_type: @@ -85,11 +91,14 @@ def get_url(self, **kwargs): '{self.datasource.id}/'.format(**locals())) return href(d) - def get_df(self): + def get_df(self, query_obj=None): + if not query_obj: + query_obj = self.query_obj() + self.error_msg = "" self.results = None - self.results = self.bake_query() + self.results = self.datasource.query(**query_obj) df = self.results.df if df is None or df.empty: raise Exception("No data, review your incantations!") @@ -118,9 +127,6 @@ def query_filters(self): filters.append((col, op, eq)) return filters - def bake_query(self): - return self.datasource.query(**self.query_obj()) - def query_obj(self): """ Building a query object @@ -260,7 +266,10 @@ class NVD3Viz(BaseViz): 'nv.d3.min.js', 'widgets/viz_nvd3.js', ] - css_files = ['nv.d3.css'] + css_files = [ + 'nv.d3.css', + 'widgets/viz_nvd3.css', + ] class BubbleViz(NVD3Viz): @@ -387,34 +396,36 @@ class NVD3TimeSeriesViz(NVD3Viz): 'metrics', 'groupby', 'limit', ('rolling_type', 'rolling_periods'), - ('num_period_compare', 'line_interpolation'), + ('time_compare', 'num_period_compare'), + ('line_interpolation', None), ('show_brush', 'show_legend'), ('rich_tooltip', 'y_axis_zero'), - ('y_log_scale', 'contribution') + ('y_log_scale', 'contribution'), ] - def get_df(self): + def get_df(self, query_obj=None): form_data = self.form_data - df = super(NVD3TimeSeriesViz, self).get_df() + df = super(NVD3TimeSeriesViz, self).get_df(query_obj) + df = df.fillna(0) if form_data.get("granularity") == "all": raise Exception("Pick a time granularity for your time series") df = df.pivot_table( index="timestamp", - columns=self.form_data.get('groupby'), - values=self.form_data.get('metrics')) + columns=form_data.get('groupby'), + values=form_data.get('metrics')) if self.sort_series: dfs = df.sum() dfs.sort(ascending=False) df = df[dfs.index] - if self.form_data.get("contribution") == "y": + if form_data.get("contribution"): dft = df.T df = (dft / dft.sum()).T - num_period_compare = self.form_data.get("num_period_compare") + num_period_compare = form_data.get("num_period_compare") if num_period_compare: num_period_compare = int(num_period_compare) df = df / df.shift(num_period_compare) @@ -431,8 +442,7 @@ def get_df(self): df = pd.rolling_sum(df, int(rolling_periods)) return df - def get_json_data(self): - df = self.get_df() + def to_series(self, df, classed='', title_suffix=''): series = df.to_dict('series') chart_data = [] for name in df.T.index.tolist(): @@ -448,14 +458,39 @@ def get_json_data(self): series_title = ", ".join(name) else: series_title = ", ".join(name[1:]) + color = utils.color(series_title) + if title_suffix: + series_title += title_suffix + d = { "key": series_title, - "color": utils.color(series_title), + "color": color, + "classed": classed, "values": [ {'x': ds, 'y': ys[i]} for i, ds in enumerate(df.timestamp)] } chart_data.append(d) + return chart_data + + def get_json_data(self): + df = self.get_df() + chart_data = self.to_series(df) + + time_compare = self.form_data.get('time_compare') + if time_compare: + query_object = self.query_obj() + delta = utils.parse_human_timedelta(time_compare) + query_object['inner_from_dttm'] = query_object['from_dttm'] + query_object['inner_to_dttm'] = query_object['to_dttm'] + query_object['from_dttm'] -= delta + query_object['to_dttm'] -= delta + df2 = self.get_df(query_object) + df2.index += delta + chart_data += self.to_series( + df2, classed='dashed', title_suffix="---") + chart_data = sorted(chart_data, key=lambda x: x['key']) + data = { 'chart_data': chart_data, 'query': self.results.query,