Merge remote-tracking branch 'upstream/master' into Rt05

* upstream/master: fix ci failures (pandas-dev#25225) STY: use pytest.raises context manager (indexes/period) (pandas-dev#25199) DOC: exclude autogenerated c/cpp/html files from 'trailing whitespace' checks (pandas-dev#24549) DOC: Fixes to docstrings and add PR10 (space before colon) to validation (pandas-dev#25109) REF: Remove many Panel tests (pandas-dev#25191) BUG: Fix Series.is_unique with single occurrence of NaN (pandas-dev#25182)
thoo · Feb 8, 2019 · ba4f001 · ba4f001
2 parents b55b270 + 1d1b14c
commit ba4f001
Show file tree

Hide file tree

Showing 31 changed files with 212 additions and 940 deletions.
diff --git a/ci/code_checks.sh b/ci/code_checks.sh
@@ -93,7 +93,7 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then
     # this particular codebase (e.g. src/headers, src/klib, src/msgpack). However,
     # we can lint all header files since they aren't "generated" like C files are.
     MSG='Linting .c and .h' ; echo $MSG
-    cpplint --quiet --extensions=c,h --headers=h --recursive --filter=-readability/casting,-runtime/int,-build/include_subdir pandas/_libs/src/*.h pandas/_libs/src/parser pandas/_libs/ujson pandas/_libs/tslibs/src/datetime
+    cpplint --quiet --extensions=c,h --headers=h --recursive --filter=-readability/casting,-runtime/int,-build/include_subdir pandas/_libs/src/*.h pandas/_libs/src/parser pandas/_libs/ujson pandas/_libs/tslibs/src/datetime pandas/io/msgpack pandas/_libs/*.cpp pandas/util
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
     echo "isort --version-number"
@@ -174,9 +174,10 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
     MSG='Check that no file in the repo contains tailing whitespaces' ; echo $MSG
     set -o pipefail
     if [[ "$AZURE" == "true" ]]; then
-        ! grep -n --exclude="*.svg" -RI "\s$" * | awk -F ":" '{print "##vso[task.logissue type=error;sourcepath=" $1 ";linenumber=" $2 ";] Tailing whitespaces found: " $3}'
+        # we exclude all c/cpp files as the c/cpp files of pandas code base are tested when Linting .c and .h files
+        ! grep -n '--exclude=*.'{svg,c,cpp,html} -RI "\s$" * | awk -F ":" '{print "##vso[task.logissue type=error;sourcepath=" $1 ";linenumber=" $2 ";] Tailing whitespaces found: " $3}'
     else
-        ! grep -n --exclude="*.svg" -RI "\s$" * | awk -F ":" '{print $1 ":" $2 ":Tailing whitespaces found: " $3}'
+        ! grep -n '--exclude=*.'{svg,c,cpp,html}  -RI "\s$" * | awk -F ":" '{print $1 ":" $2 ":Tailing whitespaces found: " $3}'
     fi
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 fi
@@ -240,8 +241,16 @@ fi
 ### DOCSTRINGS ###
 if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
 
+<<<<<<< HEAD
     MSG='Validate docstrings (GL06, GL07, GL09, SS04, SS05, PR03, PR05, EX04, RT04, RT05, SA05)' ; echo $MSG
     $BASE_DIR/scripts/validate_docstrings.py --format=azure --errors=GL06,GL07,GL09,SS04,SS05,PR03,PR05,EX04,RT04,RT05,SA05
+||||||| merged common ancestors
+    MSG='Validate docstrings (GL06, GL07, GL09, SS04, PR03, PR05, EX04, RT04, SS05, SA05)' ; echo $MSG
+    $BASE_DIR/scripts/validate_docstrings.py --format=azure --errors=GL06,GL07,GL09,SS04,PR03,PR05,EX04,RT04,SS05,SA05
+=======
+    MSG='Validate docstrings (GL06, GL07, GL09, SS04, PR03, PR05, PR10, EX04, RT04, SS05, SA05)' ; echo $MSG
+    $BASE_DIR/scripts/validate_docstrings.py --format=azure --errors=GL06,GL07,GL09,SS04,PR03,PR05,EX04,RT04,SS05,SA05
+>>>>>>> upstream/master
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
 fi

diff --git a/doc/source/whatsnew/v0.24.2.rst b/doc/source/whatsnew/v0.24.2.rst
@@ -87,7 +87,7 @@ Bug Fixes
 
 **Other**
 
--
+- Bug in :meth:`Series.is_unique` where single occurrences of ``NaN`` were not considered unique (:issue:`25180`)
 -
 -
 

diff --git a/pandas/core/base.py b/pandas/core/base.py
@@ -1365,7 +1365,7 @@ def is_unique(self):
         -------
         is_unique : boolean
         """
-        return self.nunique() == len(self)
+        return self.nunique(dropna=False) == len(self)
 
     @property
     def is_monotonic(self):

diff --git a/pandas/core/config.py b/pandas/core/config.py
@@ -282,8 +282,8 @@ def __doc__(self):
     Note: partial matches are supported for convenience, but unless you use the
     full option name (e.g. x.y.z.option_name), your code may break in future
     versions if new options with similar names are introduced.
-value :
-    new value of option.
+value : object
+    New value of option.
 
 Returns
 -------

diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
@@ -268,20 +268,6 @@ def _slice(self, slicer):
         """ return a slice of my values """
         return self.values[slicer]
 
-    def reshape_nd(self, labels, shape, ref_items):
-        """
-        Parameters
-        ----------
-        labels : list of new axis labels
-        shape : new shape
-        ref_items : new ref_items
-
-        return a new block that is transformed to a nd block
-        """
-        return _block2d_to_blocknd(values=self.get_values().T,
-                                   placement=self.mgr_locs, shape=shape,
-                                   labels=labels, ref_items=ref_items)
-
     def getitem_block(self, slicer, new_mgr_locs=None):
         """
         Perform __getitem__-like, return result as block.

diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py
@@ -584,10 +584,6 @@ def comp(s, regex=False):
         bm._consolidate_inplace()
         return bm
 
-    def reshape_nd(self, axes, **kwargs):
-        """ a 2d-nd reshape operation on a BlockManager """
-        return self.apply('reshape_nd', axes=axes, **kwargs)
-
     def is_consolidated(self):
         """
         Return True if more than one block with the same dtype

diff --git a/pandas/io/excel.py b/pandas/io/excel.py
@@ -1003,7 +1003,7 @@ class ExcelWriter(object):
     mode : {'w' or 'a'}, default 'w'
         File mode to use (write or append).
 
-    .. versionadded:: 0.24.0
+        .. versionadded:: 0.24.0
 
     Attributes
     ----------

diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
@@ -197,7 +197,6 @@ class DuplicateWarning(Warning):
     u'appendable_multiseries': 'AppendableMultiSeriesTable',
     u'appendable_frame': 'AppendableFrameTable',
     u'appendable_multiframe': 'AppendableMultiFrameTable',
-    u'appendable_panel': 'AppendablePanelTable',
     u'worm': 'WORMTable',
     u'legacy_frame': 'LegacyFrameTable',
     u'legacy_panel': 'LegacyPanelTable',
@@ -4420,24 +4419,6 @@ def read(self, **kwargs):
         return df
 
 
-class AppendablePanelTable(AppendableTable):
-
-    """ suppor the new appendable table formats """
-    table_type = u'appendable_panel'
-    ndim = 3
-    obj_type = Panel
-
-    def get_object(self, obj):
-        """ these are written transposed """
-        if self.is_transposed:
-            obj = obj.transpose(*self.data_orientation)
-        return obj
-
-    @property
-    def is_transposed(self):
-        return self.data_orientation != tuple(range(self.ndim))
-
-
 def _reindex_axis(obj, axis, labels, other=None):
     ax = obj._get_axis(axis)
     labels = ensure_index(labels)

diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py
@@ -2549,7 +2549,7 @@ def boxplot_frame_groupby(grouped, subplots=True, column=None, fontsize=None,
     Parameters
     ----------
     grouped : Grouped DataFrame
-    subplots :
+    subplots : bool
         * ``False`` - no subplots will be used
         * ``True`` - create a subplot for each group
     column : column name or list of names, or vector

diff --git a/pandas/tests/dtypes/test_missing.py b/pandas/tests/dtypes/test_missing.py
@@ -2,7 +2,7 @@
 
 from datetime import datetime
 from decimal import Decimal
-from warnings import catch_warnings, filterwarnings, simplefilter
+from warnings import catch_warnings, filterwarnings
 
 import numpy as np
 import pytest
@@ -94,15 +94,6 @@ def test_isna_isnull(self, isna_f):
             expected = df.apply(isna_f)
             tm.assert_frame_equal(result, expected)
 
-        # panel
-        with catch_warnings(record=True):
-            simplefilter("ignore", FutureWarning)
-            for p in [tm.makePanel(), tm.makePeriodPanel(),
-                      tm.add_nans(tm.makePanel())]:
-                result = isna_f(p)
-                expected = p.apply(isna_f)
-                tm.assert_panel_equal(result, expected)
-
     def test_isna_lists(self):
         result = isna([[False]])
         exp = np.array([[False]])

diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py
@@ -14,7 +14,6 @@
 from pandas import DataFrame, Index, MultiIndex, Series, date_range
 from pandas.core.computation.check import _NUMEXPR_INSTALLED
 from pandas.tests.frame.common import TestData
-import pandas.util.testing as tm
 from pandas.util.testing import (
     assert_frame_equal, assert_series_equal, makeCustomDataframe as mkdf)
 
@@ -355,13 +354,6 @@ def to_series(mi, level):
             else:
                 raise AssertionError("object must be a Series or Index")
 
-    @pytest.mark.filterwarnings("ignore::FutureWarning")
-    def test_raise_on_panel_with_multiindex(self, parser, engine):
-        p = tm.makePanel(7)
-        p.items = tm.makeCustomIndex(len(p.items), nlevels=2)
-        with pytest.raises(NotImplementedError):
-            pd.eval('p + 1', parser=parser, engine=engine)
-
 
 @td.skip_if_no_ne
 class TestDataFrameQueryNumExprPandas(object):

diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
@@ -1218,26 +1218,6 @@ def test_groupby_nat_exclude():
             grouped.get_group(pd.NaT)
 
 
-@pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning")
-def test_sparse_friendly(df):
-    sdf = df[['C', 'D']].to_sparse()
-    panel = tm.makePanel()
-    tm.add_nans(panel)
-
-    def _check_work(gp):
-        gp.mean()
-        gp.agg(np.mean)
-        dict(iter(gp))
-
-    # it works!
-    _check_work(sdf.groupby(lambda x: x // 2))
-    _check_work(sdf['C'].groupby(lambda x: x // 2))
-    _check_work(sdf.groupby(df['A']))
-
-    # do this someday
-    # _check_work(panel.groupby(lambda x: x.month, axis=1))
-
-
 def test_groupby_2d_malformed():
     d = DataFrame(index=lrange(2))
     d['group'] = ['g1', 'g2']

diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py
@@ -913,3 +913,24 @@ def test_astype_category(self, copy, name, ordered):
             result = index.astype('category', copy=copy)
             expected = CategoricalIndex(index.values, name=name)
             tm.assert_index_equal(result, expected)
+
+    def test_is_unique(self):
+        # initialize a unique index
+        index = self.create_index().drop_duplicates()
+        assert index.is_unique is True
+
+        # empty index should be unique
+        index_empty = index[:0]
+        assert index_empty.is_unique is True
+
+        # test basic dupes
+        index_dup = index.insert(0, index[0])
+        assert index_dup.is_unique is False
+
+        # single NA should be unique
+        index_na = index.insert(0, np.nan)
+        assert index_na.is_unique is True
+
+        # multiple NA should not be unique
+        index_na_dup = index_na.insert(0, np.nan)
+        assert index_na_dup.is_unique is False
diff --git a/pandas/tests/indexes/interval/test_interval.py b/pandas/tests/indexes/interval/test_interval.py
@@ -242,12 +242,10 @@ def test_take(self, closed):
             [0, 0, 1], [1, 1, 2], closed=closed)
         tm.assert_index_equal(result, expected)
 
-    def test_unique(self, closed):
-        # unique non-overlapping
-        idx = IntervalIndex.from_tuples(
-            [(0, 1), (2, 3), (4, 5)], closed=closed)
-        assert idx.is_unique is True
-
+    def test_is_unique_interval(self, closed):
+        """
+        Interval specific tests for is_unique in addition to base class tests
+        """
         # unique overlapping - distinct endpoints
         idx = IntervalIndex.from_tuples([(0, 1), (0.5, 1.5)], closed=closed)
         assert idx.is_unique is True
@@ -261,15 +259,6 @@ def test_unique(self, closed):
         idx = IntervalIndex.from_tuples([(-1, 1), (-2, 2)], closed=closed)
         assert idx.is_unique is True
 
-        # duplicate
-        idx = IntervalIndex.from_tuples(
-            [(0, 1), (0, 1), (2, 3)], closed=closed)
-        assert idx.is_unique is False
-
-        # empty
-        idx = IntervalIndex([], closed=closed)
-        assert idx.is_unique is True
-
     def test_monotonic(self, closed):
         # increasing non-overlapping
         idx = IntervalIndex.from_tuples(

diff --git a/pandas/tests/indexes/multi/test_duplicates.py b/pandas/tests/indexes/multi/test_duplicates.py
@@ -143,6 +143,18 @@ def test_has_duplicates(idx, idx_dup):
     assert mi.is_unique is False
     assert mi.has_duplicates is True
 
+    # single instance of NaN
+    mi_nan = MultiIndex(levels=[['a', 'b'], [0, 1]],
+                        codes=[[-1, 0, 0, 1, 1], [-1, 0, 1, 0, 1]])
+    assert mi_nan.is_unique is True
+    assert mi_nan.has_duplicates is False
+
+    # multiple instances of NaN
+    mi_nan_dup = MultiIndex(levels=[['a', 'b'], [0, 1]],
+                            codes=[[-1, -1, 0, 0, 1, 1], [-1, -1, 0, 1, 0, 1]])
+    assert mi_nan_dup.is_unique is False
+    assert mi_nan_dup.has_duplicates is True
+
 
 def test_has_duplicates_from_tuples():
     # GH 9075

diff --git a/pandas/tests/indexes/period/test_asfreq.py b/pandas/tests/indexes/period/test_asfreq.py
@@ -67,7 +67,9 @@ def test_asfreq(self):
         assert pi7.asfreq('H', 'S') == pi5
         assert pi7.asfreq('Min', 'S') == pi6
 
-        pytest.raises(ValueError, pi7.asfreq, 'T', 'foo')
+        msg = "How must be one of S or E"
+        with pytest.raises(ValueError, match=msg):
+            pi7.asfreq('T', 'foo')
         result1 = pi1.asfreq('3M')
         result2 = pi1.asfreq('M')
         expected = period_range(freq='M', start='2001-12', end='2001-12')