Skip to content

Commit

Permalink
table: Ensure correct dtype in _compute_distributions
Browse files Browse the repository at this point in the history
Fix an 'ValueError: cannot convert float NaN to integer' in bincount
when the column data comes from a object array and contains NaN values.
  • Loading branch information
ales-erjavec committed Jan 27, 2017
1 parent 53b0321 commit e5c1dc3
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 2 deletions.
2 changes: 2 additions & 0 deletions Orange/data/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -1309,6 +1309,8 @@ def _get_matrix(M, cachedM, col):
m, W, Xcsc = _get_matrix(self.X, Xcsc, col)
elif col < 0:
m, W, Xcsc = _get_matrix(self.metas, Xcsc, col * (-1) - 1)
if np.issubdtype(m.dtype, np.dtype(object)):
m = m.astype(float)
else:
m, W, Ycsc = _get_matrix(self._Y, Ycsc, col - self.X.shape[1])
if var.is_discrete:
Expand Down
11 changes: 9 additions & 2 deletions Orange/tests/test_distribution.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Test methods with long descriptive names can omit docstrings
# pylint: disable=missing-docstring
# Test internal methods
# pylint: disable=missing-docstring, protected-access

import unittest
from unittest.mock import Mock
Expand Down Expand Up @@ -430,7 +431,13 @@ def test_compute_distributions_metas(self):
variable = d.domain[-2]
dist, _ = d._compute_distributions([variable])[0]
np.testing.assert_almost_equal(dist, [3, 3, 2])

# repeat with nan values
assert d.metas.dtype.kind == "O"
assert d.metas[0, 1] == 0
d.metas[0, 1] = np.nan
dist, nanc = d._compute_distributions([variable])[0]
np.testing.assert_almost_equal(dist, [2, 3, 2])
self.assertEqual(nanc, 1)

if __name__ == "__main__":
unittest.main()

0 comments on commit e5c1dc3

Please sign in to comment.