Skip to content

Commit

Permalink
FIX-#440: replace infs with nans
Browse files Browse the repository at this point in the history
Signed-off-by: Cristian Garcia <cgarcia.e88@gmail.com>
  • Loading branch information
cgarciae committed Jan 6, 2022
1 parent 91e8e98 commit 7df184e
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 0 deletions.
7 changes: 7 additions & 0 deletions lux/executor/PandasExecutor.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# fmt: off

import pandas as pd
from lux.vis.VisList import VisList
Expand Down Expand Up @@ -281,6 +282,8 @@ def execute_binning(ldf: LuxDataFrame, vis: Vis):
"""
import numpy as np

vis._vis_data = vis._vis_data.replace([np.inf, -np.inf], np.nan)

bin_attribute = [x for x in vis._inferred_intent if x.bin_size != 0][0]
bin_attr = bin_attribute.attribute
series = vis.data[bin_attr]
Expand Down Expand Up @@ -379,6 +382,10 @@ def execute_2D_binning(vis: Vis) -> None:
----------
vis : Vis
"""
import numpy as np

vis._vis_data = vis._vis_data.replace([np.inf, -np.inf], np.nan)

pd.reset_option("mode.chained_assignment")
with pd.option_context("mode.chained_assignment", None):
x_attr = vis.get_attr_by_channel("x")[0].attribute
Expand Down
18 changes: 18 additions & 0 deletions tests/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from .context import lux
import pytest
import pandas as pd
import numpy as np


def test_head_tail(global_var):
Expand Down Expand Up @@ -54,3 +55,20 @@ def test_convert_dtype(global_var):
cdf = df.convert_dtypes()
cdf._ipython_display_()
assert list(cdf.recommendation.keys()) == ["Correlation", "Distribution", "Occurrence"]

def test_heatmap_with_nans():
nrows = 100_000

# continuous
c1 = np.random.uniform(0, 1, size=nrows)
c1[2] = np.inf
c2 = np.random.uniform(0, 1, size=nrows)
c2[3] = np.inf

# discrete
d1 = np.random.randint(0, 2, size=nrows)
d2 = np.random.randint(0, 2, size=nrows)

df = pd.DataFrame({"c1": c1, "c2": c2, "d1": d1, "d2": d2})

df._ipython_display_()

0 comments on commit 7df184e

Please sign in to comment.