From 4abee66efc9768671c1ca23a9ecd2115822341e5 Mon Sep 17 00:00:00 2001
From: Cristian Garcia <cgarcia.e88@gmail.com>
Date: Thu, 13 Jan 2022 10:32:15 -0500
Subject: [PATCH] FIX-#440: Replace infs with nans to avoid crash when creating
 a heatmap (#442)

* FIX-#440: replace infs with nans

Signed-off-by: Cristian Garcia <cgarcia.e88@gmail.com>

* Change test name

Co-authored-by: Doris Lee <dorisjunglinlee@gmail.com>

* remove ignore format option on PandasExecutor.py

Co-authored-by: Doris Lee <dorisjunglinlee@gmail.com>
---
 lux/executor/PandasExecutor.py |  6 ++++++
 tests/test_pandas.py           | 19 +++++++++++++++++++
 2 files changed, 25 insertions(+)

diff --git a/lux/executor/PandasExecutor.py b/lux/executor/PandasExecutor.py
index 30dfdf4c..8d2314cf 100644
--- a/lux/executor/PandasExecutor.py
+++ b/lux/executor/PandasExecutor.py
@@ -281,6 +281,8 @@ def execute_binning(ldf: LuxDataFrame, vis: Vis):
         """
         import numpy as np
 
+        vis._vis_data = vis._vis_data.replace([np.inf, -np.inf], np.nan)
+
         bin_attribute = [x for x in vis._inferred_intent if x.bin_size != 0][0]
         bin_attr = bin_attribute.attribute
         series = vis.data[bin_attr]
@@ -379,6 +381,10 @@ def execute_2D_binning(vis: Vis) -> None:
         ----------
         vis : Vis
         """
+        import numpy as np
+
+        vis._vis_data = vis._vis_data.replace([np.inf, -np.inf], np.nan)
+
         pd.reset_option("mode.chained_assignment")
         with pd.option_context("mode.chained_assignment", None):
             x_attr = vis.get_attr_by_channel("x")[0].attribute
diff --git a/tests/test_pandas.py b/tests/test_pandas.py
index 6f3dea8c..6bc23bb0 100644
--- a/tests/test_pandas.py
+++ b/tests/test_pandas.py
@@ -15,6 +15,7 @@
 from .context import lux
 import pytest
 import pandas as pd
+import numpy as np
 
 
 def test_head_tail(global_var):
@@ -54,3 +55,21 @@ def test_convert_dtype(global_var):
     cdf = df.convert_dtypes()
     cdf._ipython_display_()
     assert list(cdf.recommendation.keys()) == ["Correlation", "Distribution", "Occurrence"]
+
+
+def test_infs():
+    nrows = 100_000
+
+    # continuous
+    c1 = np.random.uniform(0, 1, size=nrows)
+    c1[2] = np.inf
+    c2 = np.random.uniform(0, 1, size=nrows)
+    c2[3] = np.inf
+
+    # discrete
+    d1 = np.random.randint(0, 2, size=nrows)
+    d2 = np.random.randint(0, 2, size=nrows)
+
+    df = pd.DataFrame({"c1": c1, "c2": c2, "d1": d1, "d2": d2})
+
+    df._ipython_display_()