From 4abee66efc9768671c1ca23a9ecd2115822341e5 Mon Sep 17 00:00:00 2001 From: Cristian Garcia Date: Thu, 13 Jan 2022 10:32:15 -0500 Subject: [PATCH] FIX-#440: Replace infs with nans to avoid crash when creating a heatmap (#442) * FIX-#440: replace infs with nans Signed-off-by: Cristian Garcia * Change test name Co-authored-by: Doris Lee * remove ignore format option on PandasExecutor.py Co-authored-by: Doris Lee --- lux/executor/PandasExecutor.py | 6 ++++++ tests/test_pandas.py | 19 +++++++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/lux/executor/PandasExecutor.py b/lux/executor/PandasExecutor.py index 30dfdf4c..8d2314cf 100644 --- a/lux/executor/PandasExecutor.py +++ b/lux/executor/PandasExecutor.py @@ -281,6 +281,8 @@ def execute_binning(ldf: LuxDataFrame, vis: Vis): """ import numpy as np + vis._vis_data = vis._vis_data.replace([np.inf, -np.inf], np.nan) + bin_attribute = [x for x in vis._inferred_intent if x.bin_size != 0][0] bin_attr = bin_attribute.attribute series = vis.data[bin_attr] @@ -379,6 +381,10 @@ def execute_2D_binning(vis: Vis) -> None: ---------- vis : Vis """ + import numpy as np + + vis._vis_data = vis._vis_data.replace([np.inf, -np.inf], np.nan) + pd.reset_option("mode.chained_assignment") with pd.option_context("mode.chained_assignment", None): x_attr = vis.get_attr_by_channel("x")[0].attribute diff --git a/tests/test_pandas.py b/tests/test_pandas.py index 6f3dea8c..6bc23bb0 100644 --- a/tests/test_pandas.py +++ b/tests/test_pandas.py @@ -15,6 +15,7 @@ from .context import lux import pytest import pandas as pd +import numpy as np def test_head_tail(global_var): @@ -54,3 +55,21 @@ def test_convert_dtype(global_var): cdf = df.convert_dtypes() cdf._ipython_display_() assert list(cdf.recommendation.keys()) == ["Correlation", "Distribution", "Occurrence"] + + +def test_infs(): + nrows = 100_000 + + # continuous + c1 = np.random.uniform(0, 1, size=nrows) + c1[2] = np.inf + c2 = np.random.uniform(0, 1, size=nrows) + c2[3] = np.inf + + # discrete + d1 = np.random.randint(0, 2, size=nrows) + d2 = np.random.randint(0, 2, size=nrows) + + df = pd.DataFrame({"c1": c1, "c2": c2, "d1": d1, "d2": d2}) + + df._ipython_display_()