From 2eb8c1be8de5c6269171c217991559394afd87a1 Mon Sep 17 00:00:00 2001
From: Maxime Beauchemin <maximebeauchemin@gmail.com>
Date: Fri, 2 Oct 2015 22:54:12 -0700
Subject: [PATCH] Introducing time comparison

---
 panoramix/bin/panoramix                       |  7 +-
 panoramix/forms.py                            | 35 +++++++---
 panoramix/models.py                           | 21 ++++--
 panoramix/static/widgets/viz_nvd3.js          |  4 +-
 panoramix/templates/panoramix/datasource.html |  3 +
 panoramix/viz.py                              | 69 ++++++++++++++-----
 6 files changed, 106 insertions(+), 33 deletions(-)

diff --git a/panoramix/bin/panoramix b/panoramix/bin/panoramix
index b897bca22445a..461341e8ca198 100755
--- a/panoramix/bin/panoramix
+++ b/panoramix/bin/panoramix
@@ -1,6 +1,7 @@
 #!/usr/bin/env python
 
 import csv
+from datetime import datetime
 import gzip
 import json
 from subprocess import Popen
@@ -8,7 +9,7 @@ from subprocess import Popen
 from flask.ext.script import Manager
 from flask.ext.migrate import MigrateCommand
 from panoramix import db
-from sqlalchemy import Column, Integer, String, Table
+from sqlalchemy import Column, Integer, String, Table, DateTime
 
 from panoramix import app
 from panoramix import models
@@ -59,7 +60,7 @@ def load_examples(sample):
         Column("year", Integer),
         Column("name", String(128)),
         Column("num", Integer),
-        Column("ds", String(20)),
+        Column("ds", DateTime),
         Column("gender", String(10)),
         Column("sum_boys", Integer),
         Column("sum_girls", Integer),
@@ -78,7 +79,7 @@ def load_examples(sample):
                 continue
             if num == "NA":
                 num = 0
-            ds = str(year) + '-01-01'
+            ds = datetime(int(year), 1, 1)
             db.engine.execute(
                 BirthNames.insert(),
                 state=state,
diff --git a/panoramix/forms.py b/panoramix/forms.py
index b38821e8fd00f..0a0b45d66cda6 100644
--- a/panoramix/forms.py
+++ b/panoramix/forms.py
@@ -1,6 +1,7 @@
 from wtforms import (
     Field, Form, SelectMultipleField, SelectField, TextField, TextAreaField,
     BooleanField, IntegerField, HiddenField)
+from wtforms import validators
 from copy import copy
 from panoramix import app
 config = app.config
@@ -58,7 +59,7 @@ def __init__(self, viz):
                 description="One or many metrics to display"),
             'groupby': SelectMultipleField(
                 'Group by',
-                choices=[(s, s) for s in datasource.groupby_column_names],
+                choices=self.choicify(datasource.groupby_column_names),
                 description="One or many fields to group by"),
             'granularity': TextField(
                 'Time Granularity', default="one day",
@@ -75,19 +76,24 @@ def __init__(self, viz):
                 SelectField(
                     'Row limit',
                     default=config.get("ROW_LIMIT"),
-                    choices=[(s, s) for s in self.row_limits]),
+                    choices=self.choicify(self.row_limits)),
             'limit':
                 SelectField(
-                    'Series limit', choices=[(s, s) for s in self.series_limits],
+                    'Series limit',
+                    choices=self.choicify(self.series_limits),
                     default=50,
                     description=(
                         "Limits the number of time series that get displayed")),
             'rolling_type': SelectField(
                 'Rolling',
+                default='mean',
                 choices=[(s, s) for s in ['mean', 'sum', 'std']],
                 description=(
                     "Defines a rolling window function to apply")),
-            'rolling_periods': TextField('Periods', description=(
+            'rolling_periods': IntegerField(
+                'Periods',
+                validators=[validators.optional()],
+                description=(
                 "Defines the size of the rolling window function, "
                 "relative to the 'granularity' field")),
             'series': SelectField(
@@ -118,7 +124,7 @@ def __init__(self, viz):
                 description="Suffix to apply after the percentage display"),
             'markup_type': SelectField(
                 "Markup Type",
-                choices=[(s, s) for s in ['markdown', 'html']],
+                choices=self.choicify(['markdown', 'html']),
                 default="markdown",
                 description="Pick your favorite markup language"),
             'rotation': SelectField(
@@ -128,9 +134,9 @@ def __init__(self, viz):
                 description="Rotation to apply to words in the cloud"),
             'line_interpolation': SelectField(
                 "Line Interpolation",
-                choices=[(s, s) for s in [
+                choices=self.choicify([
                     'linear', 'basis', 'cardinal', 'monotone',
-                    'step-before', 'step-after']],
+                    'step-before', 'step-after']),
                 default='linear',
                 description="Line interpolation as defined by d3.js"),
             'code': TextAreaField("Code", description="Put your code here"),
@@ -168,11 +174,24 @@ def __init__(self, viz):
                 description="Compute the contribution to the total"),
             'num_period_compare': IntegerField(
                 "Period Ratio", default=None,
+                validators=[validators.optional()],
                 description=(
-                    "Number of period to compare against, "
+                    "[integer] Number of period to compare against, "
                     "this is relative to the granularity selected")),
+            'time_compare': TextField(
+                "Time Shift Compare",
+                default="1 week ago",
+                description=(
+                    "Overlay a timeseries from a "
+                    "relative time period. Expects relative time delta "
+                    "in natural language (example: 24 hours, 7 days, "
+                    "56 weeks, 365 days")),
         }
 
+    @staticmethod
+    def choicify(l):
+        return [("{}".format(obj), "{}".format(obj)) for obj in l]
+
     def get_form(self, previous=False):
         px_form_fields = self.field_dict
         viz = self.viz
diff --git a/panoramix/models.py b/panoramix/models.py
index 91dcd7a0ffccb..3c3a24b4878b5 100644
--- a/panoramix/models.py
+++ b/panoramix/models.py
@@ -327,6 +327,7 @@ def query(
             filter=None,
             is_timeseries=True,
             timeseries_limit=15, row_limit=None,
+            inner_from_dttm=None, inner_to_dttm=None,
             extras=None):
 
         qry_start_dttm = datetime.now()
@@ -363,10 +364,17 @@ def query(
         from_clause = table(self.table_name)
         qry = qry.group_by(*groupby_exprs)
 
-        where_clause_and = [
+        time_filter = [
             timestamp >= from_dttm.isoformat(),
-            timestamp < to_dttm.isoformat(),
+            timestamp <= to_dttm.isoformat(),
         ]
+        inner_time_filter = copy(time_filter)
+        if inner_from_dttm:
+            inner_time_filter[0] = timestamp >= inner_from_dttm.isoformat()
+        if inner_to_dttm:
+            inner_time_filter[1] = timestamp <= inner_to_dttm.isoformat()
+
+        where_clause_and = []
         for col, op, eq in filter:
             if op in ('in', 'not in'):
                 values = eq.split(",")
@@ -376,14 +384,14 @@ def query(
                 where_clause_and.append(cond)
         if extras and 'where' in extras:
             where_clause_and += [text(extras['where'])]
-        qry = qry.where(and_(*where_clause_and))
+        qry = qry.where(and_(*(time_filter + where_clause_and)))
         qry = qry.order_by(desc(main_metric_expr))
         qry = qry.limit(row_limit)
 
         if timeseries_limit and groupby:
             subq = select(inner_groupby_exprs)
             subq = subq.select_from(table(self.table_name))
-            subq = subq.where(and_(*where_clause_and))
+            subq = subq.where(and_(*(where_clause_and + inner_time_filter)))
             subq = subq.group_by(*inner_groupby_exprs)
             subq = subq.order_by(desc(main_metric_expr))
             subq = subq.limit(timeseries_limit)
@@ -677,9 +685,13 @@ def query(
             is_timeseries=True,
             timeseries_limit=None,
             row_limit=None,
+            inner_from_dttm=None, inner_to_dttm=None,
             extras=None):
         qry_start_dttm = datetime.now()
 
+        inner_from_dttm = inner_from_dttm or from_dttm
+        inner_to_dttm = inner_to_dttm or to_dttm
+
         # add tzinfo to native datetime with config
         from_dttm = from_dttm.replace(tzinfo=config.get("DRUID_TZ"))
         to_dttm = to_dttm.replace(tzinfo=config.get("DRUID_TZ"))
@@ -738,6 +750,7 @@ def query(
             pre_qry['limit_spec'] = {
                 "type": "default",
                 "limit": timeseries_limit,
+                'intervals': inner_from_dttm.isoformat() + '/' + inner_to_dttm.isoformat(),
                 "columns": [{
                     "dimension": metrics[0] if metrics else self.metrics[0],
                     "direction": "descending",
diff --git a/panoramix/static/widgets/viz_nvd3.js b/panoramix/static/widgets/viz_nvd3.js
index 14dbd9922147f..2d7e4547f5436 100644
--- a/panoramix/static/widgets/viz_nvd3.js
+++ b/panoramix/static/widgets/viz_nvd3.js
@@ -50,7 +50,9 @@ function viz_nvd3(token_name, json_callback) {
           chart.yAxis.tickFormat(d3.format('.3s'));
           if (viz.form_data.contribution || viz.form_data.num_period_compare) {
             chart.yAxis.tickFormat(d3.format('.3p'));
-            chart.y2Axis.tickFormat(d3.format('.3p'));
+            if (chart.y2Axis != undefined) {
+                chart.y2Axis.tickFormat(d3.format('.3p'));
+            }
           }
 
         } else if (viz_type === 'dist_bar') {
diff --git a/panoramix/templates/panoramix/datasource.html b/panoramix/templates/panoramix/datasource.html
index 7beb01c0f9f83..cedb23deaaf48 100644
--- a/panoramix/templates/panoramix/datasource.html
+++ b/panoramix/templates/panoramix/datasource.html
@@ -98,6 +98,9 @@ <h3>{{ viz.verbose_name }}
           data-toggle="modal" data-target="#query_modal">query</span>
       </h3>
       <hr/>
+          {% block messages %}
+          {% endblock %}
+            {% include 'appbuilder/flash.html' %}
       <div class="viz" style="height: 700px;">
         {% block viz_html %}
         {% if viz.error_msg %}
diff --git a/panoramix/viz.py b/panoramix/viz.py
index 631578fc5f2f3..45006cc51ba6a 100644
--- a/panoramix/viz.py
+++ b/panoramix/viz.py
@@ -1,4 +1,5 @@
 from collections import OrderedDict, defaultdict
+from copy import copy
 from datetime import datetime
 import json
 import uuid
@@ -40,6 +41,11 @@ def __init__(self, datasource, form_data):
             form = form_class(form_data)
         else:
             form = form_class(**form_data)
+        if not form.validate():
+            for k, v in form.errors.items():
+                if not k.startswith("flt") and not form_data.get('token'):
+                    flash("{}: {}".format(k, " ".join(v)), 'danger')
+
         data = form.data.copy()
         previous_viz_type = form_data.get('previous_viz_type')
         if previous_viz_type in viz_types and previous_viz_type != self.viz_type:
@@ -85,11 +91,14 @@ def get_url(self, **kwargs):
             '{self.datasource.id}/'.format(**locals()))
         return href(d)
 
-    def get_df(self):
+    def get_df(self, query_obj=None):
+        if not query_obj:
+            query_obj = self.query_obj()
+
         self.error_msg = ""
         self.results = None
 
-        self.results = self.bake_query()
+        self.results = self.datasource.query(**query_obj)
         df = self.results.df
         if df is None or df.empty:
             raise Exception("No data, review your incantations!")
@@ -118,9 +127,6 @@ def query_filters(self):
                 filters.append((col, op, eq))
         return filters
 
-    def bake_query(self):
-        return self.datasource.query(**self.query_obj())
-
     def query_obj(self):
         """
         Building a query object
@@ -260,7 +266,10 @@ class NVD3Viz(BaseViz):
         'nv.d3.min.js',
         'widgets/viz_nvd3.js',
     ]
-    css_files = ['nv.d3.css']
+    css_files = [
+        'nv.d3.css',
+        'widgets/viz_nvd3.css',
+    ]
 
 
 class BubbleViz(NVD3Viz):
@@ -387,34 +396,36 @@ class NVD3TimeSeriesViz(NVD3Viz):
         'metrics',
         'groupby', 'limit',
         ('rolling_type', 'rolling_periods'),
-        ('num_period_compare', 'line_interpolation'),
+        ('time_compare', 'num_period_compare'),
+        ('line_interpolation', None),
         ('show_brush', 'show_legend'),
         ('rich_tooltip', 'y_axis_zero'),
-        ('y_log_scale', 'contribution')
+        ('y_log_scale', 'contribution'),
     ]
 
-    def get_df(self):
+    def get_df(self, query_obj=None):
         form_data = self.form_data
-        df = super(NVD3TimeSeriesViz, self).get_df()
+        df = super(NVD3TimeSeriesViz, self).get_df(query_obj)
+
         df = df.fillna(0)
         if form_data.get("granularity") == "all":
             raise Exception("Pick a time granularity for your time series")
 
         df = df.pivot_table(
             index="timestamp",
-            columns=self.form_data.get('groupby'),
-            values=self.form_data.get('metrics'))
+            columns=form_data.get('groupby'),
+            values=form_data.get('metrics'))
 
         if self.sort_series:
             dfs = df.sum()
             dfs.sort(ascending=False)
             df = df[dfs.index]
 
-        if self.form_data.get("contribution") == "y":
+        if form_data.get("contribution"):
             dft = df.T
             df = (dft / dft.sum()).T
 
-        num_period_compare = self.form_data.get("num_period_compare")
+        num_period_compare = form_data.get("num_period_compare")
         if num_period_compare:
             num_period_compare = int(num_period_compare)
             df = df / df.shift(num_period_compare)
@@ -431,8 +442,7 @@ def get_df(self):
                 df = pd.rolling_sum(df, int(rolling_periods))
         return df
 
-    def get_json_data(self):
-        df = self.get_df()
+    def to_series(self, df, classed='', title_suffix=''):
         series = df.to_dict('series')
         chart_data = []
         for name in df.T.index.tolist():
@@ -448,14 +458,39 @@ def get_json_data(self):
                     series_title = ", ".join(name)
                 else:
                     series_title = ", ".join(name[1:])
+            color = utils.color(series_title)
+            if title_suffix:
+                series_title += title_suffix
+
             d = {
                 "key": series_title,
-                "color": utils.color(series_title),
+                "color": color,
+                "classed": classed,
                 "values": [
                     {'x': ds, 'y': ys[i]}
                     for i, ds in enumerate(df.timestamp)]
             }
             chart_data.append(d)
+        return chart_data
+
+    def get_json_data(self):
+        df = self.get_df()
+        chart_data = self.to_series(df)
+
+        time_compare = self.form_data.get('time_compare')
+        if time_compare:
+            query_object = self.query_obj()
+            delta = utils.parse_human_timedelta(time_compare)
+            query_object['inner_from_dttm'] = query_object['from_dttm']
+            query_object['inner_to_dttm'] = query_object['to_dttm']
+            query_object['from_dttm'] -= delta
+            query_object['to_dttm'] -= delta
+            df2 = self.get_df(query_object)
+            df2.index += delta
+            chart_data += self.to_series(
+                df2, classed='dashed', title_suffix="---")
+            chart_data = sorted(chart_data, key=lambda x: x['key'])
+
         data = {
             'chart_data': chart_data,
             'query': self.results.query,