From e89fc5150d8e94ab29f1d6bf188cbacfa8206033 Mon Sep 17 00:00:00 2001 From: Maxime Beauchemin Date: Mon, 2 Apr 2018 21:48:14 -0700 Subject: [PATCH] [bugfix] convert metrics to numeric in dataframe (#4726) * [bugfix] convert metrics to numeric in dataframe It appears sometimes the dbapi driver and pandas's read_sql fail at returning the proper numeric types for metrics and they show up as `object` in the dataframe. This results in "No numeric types to aggregate" errors when trying to perform aggregations or pivoting in pandas. This PR looks for metrics in dataframes that are typed as "object" and uses pandas' to_numeric to convert. * Fix tests * Remove all iteritems --- superset/models/core.py | 2 +- superset/viz.py | 15 ++++++++++++--- tests/viz_tests.py | 2 ++ 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/superset/models/core.py b/superset/models/core.py index c32ac5c229635..6eef48c688ccc 100644 --- a/superset/models/core.py +++ b/superset/models/core.py @@ -702,7 +702,7 @@ def needs_conversion(df_series): return True return False - for k, v in df.dtypes.iteritems(): + for k, v in df.dtypes.items(): if v.type == numpy.object_ and needs_conversion(df[k]): df[k] = df[k].apply(utils.json_dumps_w_dates) return df diff --git a/superset/viz.py b/superset/viz.py index 5a92cd3c59c17..cb20e1181ffbe 100644 --- a/superset/viz.py +++ b/superset/viz.py @@ -170,11 +170,21 @@ def get_df(self, query_obj=None): if self.datasource.offset: df[DTTM_ALIAS] += timedelta(hours=self.datasource.offset) df[DTTM_ALIAS] += self.time_shift + + self.df_metrics_to_num(df, query_obj.get('metrics') or []) + df.replace([np.inf, -np.inf], np.nan) fillna = self.get_fillna_for_columns(df.columns) df = df.fillna(fillna) return df + @staticmethod + def df_metrics_to_num(df, metrics): + """Converting metrics to numeric when pandas.read_sql cannot""" + for col, dtype in df.dtypes.items(): + if dtype.type == np.object_ and col in metrics: + df[col] = pd.to_numeric(df[col]) + def query_obj(self): """Building a query object""" form_data = self.form_data @@ -1060,7 +1070,6 @@ def process_data(self, df, aggregate=False): df = df.fillna(0) if fd.get('granularity') == 'all': raise Exception(_('Pick a time granularity for your time series')) - if not aggregate: df = df.pivot_table( index=DTTM_ALIAS, @@ -1384,7 +1393,7 @@ def get_data(self, df): pt = (pt / pt.sum()).T pt = pt.reindex(row.index) chart_data = [] - for name, ys in pt.iteritems(): + for name, ys in pt.items(): if pt[name].dtype.kind not in 'biufc' or name in self.groupby: continue if isinstance(name, string_types): @@ -1395,7 +1404,7 @@ def get_data(self, df): l = [str(s) for s in name[1:]] # noqa: E741 series_title = ', '.join(l) values = [] - for i, v in ys.iteritems(): + for i, v in ys.items(): x = i if isinstance(x, (tuple, list)): x = ', '.join([text_type(s) for s in x]) diff --git a/tests/viz_tests.py b/tests/viz_tests.py index 6822837e28312..a5adfc1f2540f 100644 --- a/tests/viz_tests.py +++ b/tests/viz_tests.py @@ -77,6 +77,8 @@ def test_get_df_handles_dttm_col(self): results.df.empty = False datasource.query = Mock(return_value=results) test_viz = viz.BaseViz(datasource, form_data) + + test_viz.df_metrics_to_num = Mock() test_viz.get_fillna_for_columns = Mock(return_value=0) test_viz.get_df(query_obj) mock_call = df.__setitem__.mock_calls[0]