From 11a87505d420dc87db0beb4711c73732d0ef36eb Mon Sep 17 00:00:00 2001 From: Marko Toplak Date: Mon, 27 May 2019 12:47:16 +0200 Subject: [PATCH] updated test for correctness of scipy.stats.mode --- Orange/statistics/util.py | 6 ++++- Orange/tests/test_statistics.py | 26 +++++++++++++--------- Orange/widgets/data/owfeaturestatistics.py | 2 +- 3 files changed, 21 insertions(+), 13 deletions(-) diff --git a/Orange/statistics/util.py b/Orange/statistics/util.py index ad82e0d5e32..a5715645598 100644 --- a/Orange/statistics/util.py +++ b/Orange/statistics/util.py @@ -479,7 +479,11 @@ def nanmedian_sparse(x): def nanmode(x, axis=0): - """ A temporary replacement for a buggy scipy.stats.stats.mode from scipy < 1.2.0""" + """ A temporary replacement for a scipy.stats.mode. + + This function returns mode NaN if all values are NaN (scipy<1.2.0 wrongly + returns zero). Also, this function returns count NaN if all values are NaN + (scipy=1.3.0 returns some number).""" nans = np.isnan(np.array(x)).sum(axis=axis, keepdims=True) == x.shape[axis] res = scipy.stats.stats.mode(x, axis) return scipy.stats.stats.ModeResult(np.where(nans, np.nan, res.mode), diff --git a/Orange/tests/test_statistics.py b/Orange/tests/test_statistics.py index f6acdfd4224..d5442d789fc 100644 --- a/Orange/tests/test_statistics.py +++ b/Orange/tests/test_statistics.py @@ -645,14 +645,18 @@ def test_nanunique_ignores_nans_in_counts(self, array): np.testing.assert_equal(nanunique(x, return_counts=True)[1], expected) -class TestNanModeAppVeyor(unittest.TestCase): - def test_appveyour_still_not_onscipy_1_2_0(self): - import scipy - from distutils.version import StrictVersion - import os - - if os.getenv("APPVEYOR") and \ - StrictVersion(scipy.__version__) >= StrictVersion("1.2.0"): - self.fail("Appveyor now uses Scipy 1.2.0; revert changes in " - "the last three commits (bde2cbe, 7163448, ab0f31d) " - "of gh-3480. Then, remove this test.") +class TestNanModeFixedInScipy(unittest.TestCase): + + @unittest.expectedFailure + def test_scipy_nanmode_still_wrong(self): + import scipy.stats + X = np.array([[np.nan, np.nan, 1, 1], + [2, np.nan, 1, 1]]) + mode, count = scipy.stats.mode(X, 0) + np.testing.assert_array_equal(mode, [[2, np.nan, 1, 1]]) + np.testing.assert_array_equal(count, [[1, np.nan, 2, 2]]) + mode, count = scipy.stats.mode(X, 1) + np.testing.assert_array_equal(mode, [[1], [1]]) + np.testing.assert_array_equal(count, [[2], [2]]) + # When Scipy's scipy.stats.mode works correcly, remove Orange.statistics.util.nanmode + # and this test. Also update requirements. diff --git a/Orange/widgets/data/owfeaturestatistics.py b/Orange/widgets/data/owfeaturestatistics.py index 9acebe76bb5..178ffa662d2 100644 --- a/Orange/widgets/data/owfeaturestatistics.py +++ b/Orange/widgets/data/owfeaturestatistics.py @@ -252,7 +252,7 @@ def __mode(x, *args, **kwargs): if sp.issparse(x): x = x.todense(order="C") # return ss.mode(x, *args, **kwargs)[0] - return ut.nanmode(x, *args, **kwargs)[0] # Temporary replacement for scipy < 1.2.0 + return ut.nanmode(x, *args, **kwargs)[0] # Temporary replacement for scipy self._center = self.__compute_stat( matrices,