From ac88f8207d60184ae60de382c682232f71116981 Mon Sep 17 00:00:00 2001 From: Cristian Garcia Date: Thu, 6 Jan 2022 17:21:34 -0500 Subject: [PATCH] FIX-#440: replace infs with nans Signed-off-by: Cristian Garcia --- lux/executor/PandasExecutor.py | 7 +++++++ tests/test_pandas.py | 19 +++++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/lux/executor/PandasExecutor.py b/lux/executor/PandasExecutor.py index 30dfdf4c..5557c64a 100644 --- a/lux/executor/PandasExecutor.py +++ b/lux/executor/PandasExecutor.py @@ -11,6 +11,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +# fmt: off import pandas as pd from lux.vis.VisList import VisList @@ -281,6 +282,8 @@ def execute_binning(ldf: LuxDataFrame, vis: Vis): """ import numpy as np + vis._vis_data = vis._vis_data.replace([np.inf, -np.inf], np.nan) + bin_attribute = [x for x in vis._inferred_intent if x.bin_size != 0][0] bin_attr = bin_attribute.attribute series = vis.data[bin_attr] @@ -379,6 +382,10 @@ def execute_2D_binning(vis: Vis) -> None: ---------- vis : Vis """ + import numpy as np + + vis._vis_data = vis._vis_data.replace([np.inf, -np.inf], np.nan) + pd.reset_option("mode.chained_assignment") with pd.option_context("mode.chained_assignment", None): x_attr = vis.get_attr_by_channel("x")[0].attribute diff --git a/tests/test_pandas.py b/tests/test_pandas.py index 6f3dea8c..585caad6 100644 --- a/tests/test_pandas.py +++ b/tests/test_pandas.py @@ -15,6 +15,7 @@ from .context import lux import pytest import pandas as pd +import numpy as np def test_head_tail(global_var): @@ -54,3 +55,21 @@ def test_convert_dtype(global_var): cdf = df.convert_dtypes() cdf._ipython_display_() assert list(cdf.recommendation.keys()) == ["Correlation", "Distribution", "Occurrence"] + + +def test_heatmap_with_nans(): + nrows = 100_000 + + # continuous + c1 = np.random.uniform(0, 1, size=nrows) + c1[2] = np.inf + c2 = np.random.uniform(0, 1, size=nrows) + c2[3] = np.inf + + # discrete + d1 = np.random.randint(0, 2, size=nrows) + d2 = np.random.randint(0, 2, size=nrows) + + df = pd.DataFrame({"c1": c1, "c2": c2, "d1": d1, "d2": d2}) + + df._ipython_display_()