diff --git a/superset/viz.py b/superset/viz.py index 3ce9434bdca55..77b228b73f51b 100644 --- a/superset/viz.py +++ b/superset/viz.py @@ -1544,11 +1544,43 @@ class DistributionPieViz(NVD3Viz): is_timeseries = False def get_data(self, df: pd.DataFrame) -> VizData: + def _label_aggfunc(labels: pd.Series) -> str: + """ + Convert a single or multi column label into a single label, replacing + null values with `NULL_STRING` and joining multiple columns together + with a comma. Examples: + + >>> _label_aggfunc(pd.Series(["abc"])) + 'abc' + >>> _label_aggfunc(pd.Series([1])) + '1' + >>> _label_aggfunc(pd.Series(["abc", "def"])) + 'abc, def' + >>> # note: integer floats are stripped of decimal digits + >>> _label_aggfunc(pd.Series([0.1, 2.0, 0.3])) + '0.1, 2, 0.3' + >>> _label_aggfunc(pd.Series([1, None, "abc", 0.8], dtype="object")) + '1, , abc, 0.8' + """ + label_list: List[str] = [] + for label in labels: + if isinstance(label, str): + label_recast = label + elif label is None or isinstance(label, float) and math.isnan(label): + label_recast = NULL_STRING + elif isinstance(label, float) and label.is_integer(): + label_recast = str(int(label)) + else: + label_recast = str(label) + label_list.append(label_recast) + + return ", ".join(label_list) + if df.empty: return None metric = self.metric_labels[0] df = pd.DataFrame( - {"x": df[self.groupby].agg(func=", ".join, axis=1), "y": df[metric]} + {"x": df[self.groupby].agg(func=_label_aggfunc, axis=1), "y": df[metric]} ) df.sort_values(by="y", ascending=False, inplace=True) return df.to_dict(orient="records") diff --git a/tests/viz_tests.py b/tests/viz_tests.py index 17e43d8572c50..b76c95cb36949 100644 --- a/tests/viz_tests.py +++ b/tests/viz_tests.py @@ -20,6 +20,7 @@ import logging from math import nan from unittest.mock import Mock, patch +from typing import Any, Dict, List, Set import numpy as np import pandas as pd @@ -1322,3 +1323,60 @@ def test_get_aggfunc_non_numeric(self): viz.PivotTableViz.get_aggfunc("strcol", self.df, {"pandas_aggfunc": "min"}) == "min" ) + + +class TestDistributionPieViz(SupersetTestCase): + base_df = pd.DataFrame( + data={ + "intcol": [1, 2, 3, 4, None], + "floatcol": [1.0, 0.2, 0.3, 0.4, None], + "strcol_a": ["a", "a", "a", "a", None], + "strcol": ["a", "b", "c", None, "d"], + } + ) + + @staticmethod + def get_cols(data: List[Dict[str, Any]]) -> Set[str]: + return set([row["x"] for row in data]) + + def test_bool_groupby(self): + datasource = self.get_datasource_mock() + df = pd.DataFrame(data={"intcol": [1, 2, None], "boolcol": [True, None, False]}) + + pie_viz = viz.DistributionPieViz( + datasource, {"metrics": ["intcol"], "groupby": ["boolcol"]}, + ) + data = pie_viz.get_data(df) + assert self.get_cols(data) == {"True", "False", ""} + + def test_string_groupby(self): + datasource = self.get_datasource_mock() + pie_viz = viz.DistributionPieViz( + datasource, {"metrics": ["floatcol"], "groupby": ["strcol"]}, + ) + data = pie_viz.get_data(self.base_df) + assert self.get_cols(data) == {"", "a", "b", "c", "d"} + + def test_int_groupby(self): + datasource = self.get_datasource_mock() + pie_viz = viz.DistributionPieViz( + datasource, {"metrics": ["floatcol"], "groupby": ["intcol"]}, + ) + data = pie_viz.get_data(self.base_df) + assert self.get_cols(data) == {"", "1", "2", "3", "4"} + + def test_float_groupby(self): + datasource = self.get_datasource_mock() + pie_viz = viz.DistributionPieViz( + datasource, {"metrics": ["intcol"], "groupby": ["floatcol"]}, + ) + data = pie_viz.get_data(self.base_df) + assert self.get_cols(data) == {"", "1", "0.2", "0.3", "0.4"} + + def test_multi_groupby(self): + datasource = self.get_datasource_mock() + pie_viz = viz.DistributionPieViz( + datasource, {"metrics": ["floatcol"], "groupby": ["intcol", "strcol"]}, + ) + data = pie_viz.get_data(self.base_df) + assert self.get_cols(data) == {"1, a", "2, b", "3, c", "4, ", ", d"}