diff --git a/ci/code_checks.sh b/ci/code_checks.sh index dc41382f4988e..e845018d46968 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -93,7 +93,7 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then # this particular codebase (e.g. src/headers, src/klib, src/msgpack). However, # we can lint all header files since they aren't "generated" like C files are. MSG='Linting .c and .h' ; echo $MSG - cpplint --quiet --extensions=c,h --headers=h --recursive --filter=-readability/casting,-runtime/int,-build/include_subdir pandas/_libs/src/*.h pandas/_libs/src/parser pandas/_libs/ujson pandas/_libs/tslibs/src/datetime + cpplint --quiet --extensions=c,h --headers=h --recursive --filter=-readability/casting,-runtime/int,-build/include_subdir pandas/_libs/src/*.h pandas/_libs/src/parser pandas/_libs/ujson pandas/_libs/tslibs/src/datetime pandas/io/msgpack pandas/_libs/*.cpp pandas/util RET=$(($RET + $?)) ; echo $MSG "DONE" echo "isort --version-number" @@ -174,9 +174,10 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then MSG='Check that no file in the repo contains tailing whitespaces' ; echo $MSG set -o pipefail if [[ "$AZURE" == "true" ]]; then - ! grep -n --exclude="*.svg" -RI "\s$" * | awk -F ":" '{print "##vso[task.logissue type=error;sourcepath=" $1 ";linenumber=" $2 ";] Tailing whitespaces found: " $3}' + # we exclude all c/cpp files as the c/cpp files of pandas code base are tested when Linting .c and .h files + ! grep -n '--exclude=*.'{svg,c,cpp,html} -RI "\s$" * | awk -F ":" '{print "##vso[task.logissue type=error;sourcepath=" $1 ";linenumber=" $2 ";] Tailing whitespaces found: " $3}' else - ! grep -n --exclude="*.svg" -RI "\s$" * | awk -F ":" '{print $1 ":" $2 ":Tailing whitespaces found: " $3}' + ! grep -n '--exclude=*.'{svg,c,cpp,html} -RI "\s$" * | awk -F ":" '{print $1 ":" $2 ":Tailing whitespaces found: " $3}' fi RET=$(($RET + $?)) ; echo $MSG "DONE" fi @@ -240,8 +241,16 @@ fi ### DOCSTRINGS ### if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then +<<<<<<< HEAD MSG='Validate docstrings (GL06, GL07, GL09, SS04, SS05, PR03, PR05, EX04, RT04, RT05, SA05)' ; echo $MSG $BASE_DIR/scripts/validate_docstrings.py --format=azure --errors=GL06,GL07,GL09,SS04,SS05,PR03,PR05,EX04,RT04,RT05,SA05 +||||||| merged common ancestors + MSG='Validate docstrings (GL06, GL07, GL09, SS04, PR03, PR05, EX04, RT04, SS05, SA05)' ; echo $MSG + $BASE_DIR/scripts/validate_docstrings.py --format=azure --errors=GL06,GL07,GL09,SS04,PR03,PR05,EX04,RT04,SS05,SA05 +======= + MSG='Validate docstrings (GL06, GL07, GL09, SS04, PR03, PR05, PR10, EX04, RT04, SS05, SA05)' ; echo $MSG + $BASE_DIR/scripts/validate_docstrings.py --format=azure --errors=GL06,GL07,GL09,SS04,PR03,PR05,EX04,RT04,SS05,SA05 +>>>>>>> upstream/master RET=$(($RET + $?)) ; echo $MSG "DONE" fi diff --git a/doc/source/whatsnew/v0.24.2.rst b/doc/source/whatsnew/v0.24.2.rst index 6a9a316da1ec6..73df504c89d5b 100644 --- a/doc/source/whatsnew/v0.24.2.rst +++ b/doc/source/whatsnew/v0.24.2.rst @@ -87,7 +87,7 @@ Bug Fixes **Other** -- +- Bug in :meth:`Series.is_unique` where single occurrences of ``NaN`` were not considered unique (:issue:`25180`) - - diff --git a/pandas/core/base.py b/pandas/core/base.py index 726266b39cdee..5a98e83c65884 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -1365,7 +1365,7 @@ def is_unique(self): ------- is_unique : boolean """ - return self.nunique() == len(self) + return self.nunique(dropna=False) == len(self) @property def is_monotonic(self): diff --git a/pandas/core/config.py b/pandas/core/config.py index 0f43ca65d187a..01664fffb1e27 100644 --- a/pandas/core/config.py +++ b/pandas/core/config.py @@ -282,8 +282,8 @@ def __doc__(self): Note: partial matches are supported for convenience, but unless you use the full option name (e.g. x.y.z.option_name), your code may break in future versions if new options with similar names are introduced. -value : - new value of option. +value : object + New value of option. Returns ------- diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 43798d64d1172..d966b31a22932 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -268,20 +268,6 @@ def _slice(self, slicer): """ return a slice of my values """ return self.values[slicer] - def reshape_nd(self, labels, shape, ref_items): - """ - Parameters - ---------- - labels : list of new axis labels - shape : new shape - ref_items : new ref_items - - return a new block that is transformed to a nd block - """ - return _block2d_to_blocknd(values=self.get_values().T, - placement=self.mgr_locs, shape=shape, - labels=labels, ref_items=ref_items) - def getitem_block(self, slicer, new_mgr_locs=None): """ Perform __getitem__-like, return result as block. diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 5cae6e1a89170..38b719db1709f 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -584,10 +584,6 @@ def comp(s, regex=False): bm._consolidate_inplace() return bm - def reshape_nd(self, axes, **kwargs): - """ a 2d-nd reshape operation on a BlockManager """ - return self.apply('reshape_nd', axes=axes, **kwargs) - def is_consolidated(self): """ Return True if more than one block with the same dtype diff --git a/pandas/io/excel.py b/pandas/io/excel.py index 11e5e78fa3e80..9e5e9f4f0d4f6 100644 --- a/pandas/io/excel.py +++ b/pandas/io/excel.py @@ -1003,7 +1003,7 @@ class ExcelWriter(object): mode : {'w' or 'a'}, default 'w' File mode to use (write or append). - .. versionadded:: 0.24.0 + .. versionadded:: 0.24.0 Attributes ---------- diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py index 2813d722246bd..5a96b3e2db563 100644 --- a/pandas/io/pytables.py +++ b/pandas/io/pytables.py @@ -197,7 +197,6 @@ class DuplicateWarning(Warning): u'appendable_multiseries': 'AppendableMultiSeriesTable', u'appendable_frame': 'AppendableFrameTable', u'appendable_multiframe': 'AppendableMultiFrameTable', - u'appendable_panel': 'AppendablePanelTable', u'worm': 'WORMTable', u'legacy_frame': 'LegacyFrameTable', u'legacy_panel': 'LegacyPanelTable', @@ -4420,24 +4419,6 @@ def read(self, **kwargs): return df -class AppendablePanelTable(AppendableTable): - - """ suppor the new appendable table formats """ - table_type = u'appendable_panel' - ndim = 3 - obj_type = Panel - - def get_object(self, obj): - """ these are written transposed """ - if self.is_transposed: - obj = obj.transpose(*self.data_orientation) - return obj - - @property - def is_transposed(self): - return self.data_orientation != tuple(range(self.ndim)) - - def _reindex_axis(obj, axis, labels, other=None): ax = obj._get_axis(axis) labels = ensure_index(labels) diff --git a/pandas/plotting/_core.py b/pandas/plotting/_core.py index 50e8ff8e580d5..ba647c42083b2 100644 --- a/pandas/plotting/_core.py +++ b/pandas/plotting/_core.py @@ -2549,7 +2549,7 @@ def boxplot_frame_groupby(grouped, subplots=True, column=None, fontsize=None, Parameters ---------- grouped : Grouped DataFrame - subplots : + subplots : bool * ``False`` - no subplots will be used * ``True`` - create a subplot for each group column : column name or list of names, or vector diff --git a/pandas/tests/dtypes/test_missing.py b/pandas/tests/dtypes/test_missing.py index d913d2ad299ce..7ca01e13a33a9 100644 --- a/pandas/tests/dtypes/test_missing.py +++ b/pandas/tests/dtypes/test_missing.py @@ -2,7 +2,7 @@ from datetime import datetime from decimal import Decimal -from warnings import catch_warnings, filterwarnings, simplefilter +from warnings import catch_warnings, filterwarnings import numpy as np import pytest @@ -94,15 +94,6 @@ def test_isna_isnull(self, isna_f): expected = df.apply(isna_f) tm.assert_frame_equal(result, expected) - # panel - with catch_warnings(record=True): - simplefilter("ignore", FutureWarning) - for p in [tm.makePanel(), tm.makePeriodPanel(), - tm.add_nans(tm.makePanel())]: - result = isna_f(p) - expected = p.apply(isna_f) - tm.assert_panel_equal(result, expected) - def test_isna_lists(self): result = isna([[False]]) exp = np.array([[False]]) diff --git a/pandas/tests/frame/test_query_eval.py b/pandas/tests/frame/test_query_eval.py index 9c4d306ea5720..0d06d0006a9e2 100644 --- a/pandas/tests/frame/test_query_eval.py +++ b/pandas/tests/frame/test_query_eval.py @@ -14,7 +14,6 @@ from pandas import DataFrame, Index, MultiIndex, Series, date_range from pandas.core.computation.check import _NUMEXPR_INSTALLED from pandas.tests.frame.common import TestData -import pandas.util.testing as tm from pandas.util.testing import ( assert_frame_equal, assert_series_equal, makeCustomDataframe as mkdf) @@ -355,13 +354,6 @@ def to_series(mi, level): else: raise AssertionError("object must be a Series or Index") - @pytest.mark.filterwarnings("ignore::FutureWarning") - def test_raise_on_panel_with_multiindex(self, parser, engine): - p = tm.makePanel(7) - p.items = tm.makeCustomIndex(len(p.items), nlevels=2) - with pytest.raises(NotImplementedError): - pd.eval('p + 1', parser=parser, engine=engine) - @td.skip_if_no_ne class TestDataFrameQueryNumExprPandas(object): diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index f14c9dcdd8e42..1ae8efd2f6867 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -1218,26 +1218,6 @@ def test_groupby_nat_exclude(): grouped.get_group(pd.NaT) -@pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning") -def test_sparse_friendly(df): - sdf = df[['C', 'D']].to_sparse() - panel = tm.makePanel() - tm.add_nans(panel) - - def _check_work(gp): - gp.mean() - gp.agg(np.mean) - dict(iter(gp)) - - # it works! - _check_work(sdf.groupby(lambda x: x // 2)) - _check_work(sdf['C'].groupby(lambda x: x // 2)) - _check_work(sdf.groupby(df['A'])) - - # do this someday - # _check_work(panel.groupby(lambda x: x.month, axis=1)) - - def test_groupby_2d_malformed(): d = DataFrame(index=lrange(2)) d['group'] = ['g1', 'g2'] diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index a838779689c44..6d29c147c4a4a 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -913,3 +913,24 @@ def test_astype_category(self, copy, name, ordered): result = index.astype('category', copy=copy) expected = CategoricalIndex(index.values, name=name) tm.assert_index_equal(result, expected) + + def test_is_unique(self): + # initialize a unique index + index = self.create_index().drop_duplicates() + assert index.is_unique is True + + # empty index should be unique + index_empty = index[:0] + assert index_empty.is_unique is True + + # test basic dupes + index_dup = index.insert(0, index[0]) + assert index_dup.is_unique is False + + # single NA should be unique + index_na = index.insert(0, np.nan) + assert index_na.is_unique is True + + # multiple NA should not be unique + index_na_dup = index_na.insert(0, np.nan) + assert index_na_dup.is_unique is False diff --git a/pandas/tests/indexes/interval/test_interval.py b/pandas/tests/indexes/interval/test_interval.py index f1fd06c9cab6e..e4f25ff143273 100644 --- a/pandas/tests/indexes/interval/test_interval.py +++ b/pandas/tests/indexes/interval/test_interval.py @@ -242,12 +242,10 @@ def test_take(self, closed): [0, 0, 1], [1, 1, 2], closed=closed) tm.assert_index_equal(result, expected) - def test_unique(self, closed): - # unique non-overlapping - idx = IntervalIndex.from_tuples( - [(0, 1), (2, 3), (4, 5)], closed=closed) - assert idx.is_unique is True - + def test_is_unique_interval(self, closed): + """ + Interval specific tests for is_unique in addition to base class tests + """ # unique overlapping - distinct endpoints idx = IntervalIndex.from_tuples([(0, 1), (0.5, 1.5)], closed=closed) assert idx.is_unique is True @@ -261,15 +259,6 @@ def test_unique(self, closed): idx = IntervalIndex.from_tuples([(-1, 1), (-2, 2)], closed=closed) assert idx.is_unique is True - # duplicate - idx = IntervalIndex.from_tuples( - [(0, 1), (0, 1), (2, 3)], closed=closed) - assert idx.is_unique is False - - # empty - idx = IntervalIndex([], closed=closed) - assert idx.is_unique is True - def test_monotonic(self, closed): # increasing non-overlapping idx = IntervalIndex.from_tuples( diff --git a/pandas/tests/indexes/multi/test_duplicates.py b/pandas/tests/indexes/multi/test_duplicates.py index af15026de2b34..35034dc57b4b8 100644 --- a/pandas/tests/indexes/multi/test_duplicates.py +++ b/pandas/tests/indexes/multi/test_duplicates.py @@ -143,6 +143,18 @@ def test_has_duplicates(idx, idx_dup): assert mi.is_unique is False assert mi.has_duplicates is True + # single instance of NaN + mi_nan = MultiIndex(levels=[['a', 'b'], [0, 1]], + codes=[[-1, 0, 0, 1, 1], [-1, 0, 1, 0, 1]]) + assert mi_nan.is_unique is True + assert mi_nan.has_duplicates is False + + # multiple instances of NaN + mi_nan_dup = MultiIndex(levels=[['a', 'b'], [0, 1]], + codes=[[-1, -1, 0, 0, 1, 1], [-1, -1, 0, 1, 0, 1]]) + assert mi_nan_dup.is_unique is False + assert mi_nan_dup.has_duplicates is True + def test_has_duplicates_from_tuples(): # GH 9075 diff --git a/pandas/tests/indexes/period/test_asfreq.py b/pandas/tests/indexes/period/test_asfreq.py index 2dd49e7e0845e..30b416e3fe9dd 100644 --- a/pandas/tests/indexes/period/test_asfreq.py +++ b/pandas/tests/indexes/period/test_asfreq.py @@ -67,7 +67,9 @@ def test_asfreq(self): assert pi7.asfreq('H', 'S') == pi5 assert pi7.asfreq('Min', 'S') == pi6 - pytest.raises(ValueError, pi7.asfreq, 'T', 'foo') + msg = "How must be one of S or E" + with pytest.raises(ValueError, match=msg): + pi7.asfreq('T', 'foo') result1 = pi1.asfreq('3M') result2 = pi1.asfreq('M') expected = period_range(freq='M', start='2001-12', end='2001-12') diff --git a/pandas/tests/indexes/period/test_construction.py b/pandas/tests/indexes/period/test_construction.py index 916260c4cee7e..f1adeca7245f6 100644 --- a/pandas/tests/indexes/period/test_construction.py +++ b/pandas/tests/indexes/period/test_construction.py @@ -1,6 +1,7 @@ import numpy as np import pytest +from pandas._libs.tslibs.period import IncompatibleFrequency from pandas.compat import PY3, lmap, lrange, text_type from pandas.core.dtypes.dtypes import PeriodDtype @@ -66,12 +67,17 @@ def test_constructor_field_arrays(self): years = [2007, 2007, 2007] months = [1, 2] - pytest.raises(ValueError, PeriodIndex, year=years, month=months, - freq='M') - pytest.raises(ValueError, PeriodIndex, year=years, month=months, - freq='2M') - pytest.raises(ValueError, PeriodIndex, year=years, month=months, - freq='M', start=Period('2007-01', freq='M')) + + msg = "Mismatched Period array lengths" + with pytest.raises(ValueError, match=msg): + PeriodIndex(year=years, month=months, freq='M') + with pytest.raises(ValueError, match=msg): + PeriodIndex(year=years, month=months, freq='2M') + + msg = "Can either instantiate from fields or endpoints, but not both" + with pytest.raises(ValueError, match=msg): + PeriodIndex(year=years, month=months, freq='M', + start=Period('2007-01', freq='M')) years = [2007, 2007, 2007] months = [1, 2, 3] @@ -81,8 +87,8 @@ def test_constructor_field_arrays(self): def test_constructor_U(self): # U was used as undefined period - pytest.raises(ValueError, period_range, '2007-1-1', periods=500, - freq='X') + with pytest.raises(ValueError, match="Invalid frequency: X"): + period_range('2007-1-1', periods=500, freq='X') def test_constructor_nano(self): idx = period_range(start=Period(ordinal=1, freq='N'), @@ -103,17 +109,29 @@ def test_constructor_arrays_negative_year(self): tm.assert_index_equal(pindex.quarter, pd.Index(quarters)) def test_constructor_invalid_quarters(self): - pytest.raises(ValueError, PeriodIndex, year=lrange(2000, 2004), - quarter=lrange(4), freq='Q-DEC') + msg = "Quarter must be 1 <= q <= 4" + with pytest.raises(ValueError, match=msg): + PeriodIndex(year=lrange(2000, 2004), quarter=lrange(4), + freq='Q-DEC') def test_constructor_corner(self): - pytest.raises(ValueError, PeriodIndex, periods=10, freq='A') + msg = "Not enough parameters to construct Period range" + with pytest.raises(ValueError, match=msg): + PeriodIndex(periods=10, freq='A') start = Period('2007', freq='A-JUN') end = Period('2010', freq='A-DEC') - pytest.raises(ValueError, PeriodIndex, start=start, end=end) - pytest.raises(ValueError, PeriodIndex, start=start) - pytest.raises(ValueError, PeriodIndex, end=end) + + msg = "start and end must have same freq" + with pytest.raises(ValueError, match=msg): + PeriodIndex(start=start, end=end) + + msg = ("Of the three parameters: start, end, and periods, exactly two" + " must be specified") + with pytest.raises(ValueError, match=msg): + PeriodIndex(start=start) + with pytest.raises(ValueError, match=msg): + PeriodIndex(end=end) result = period_range('2007-01', periods=10.5, freq='M') exp = period_range('2007-01', periods=10, freq='M') @@ -126,10 +144,15 @@ def test_constructor_fromarraylike(self): tm.assert_index_equal(PeriodIndex(idx.values), idx) tm.assert_index_equal(PeriodIndex(list(idx.values)), idx) - pytest.raises(ValueError, PeriodIndex, idx._ndarray_values) - pytest.raises(ValueError, PeriodIndex, list(idx._ndarray_values)) - pytest.raises(TypeError, PeriodIndex, - data=Period('2007', freq='A')) + msg = "freq not specified and cannot be inferred" + with pytest.raises(ValueError, match=msg): + PeriodIndex(idx._ndarray_values) + with pytest.raises(ValueError, match=msg): + PeriodIndex(list(idx._ndarray_values)) + + msg = "'Period' object is not iterable" + with pytest.raises(TypeError, match=msg): + PeriodIndex(data=Period('2007', freq='A')) result = PeriodIndex(iter(idx)) tm.assert_index_equal(result, idx) @@ -160,7 +183,9 @@ def test_constructor_datetime64arr(self): vals = np.arange(100000, 100000 + 10000, 100, dtype=np.int64) vals = vals.view(np.dtype('M8[us]')) - pytest.raises(ValueError, PeriodIndex, vals, freq='D') + msg = r"Wrong dtype: datetime64\[us\]" + with pytest.raises(ValueError, match=msg): + PeriodIndex(vals, freq='D') @pytest.mark.parametrize('box', [None, 'series', 'index']) def test_constructor_datetime64arr_ok(self, box): @@ -300,17 +325,20 @@ def test_constructor_simple_new_empty(self): @pytest.mark.parametrize('floats', [[1.1, 2.1], np.array([1.1, 2.1])]) def test_constructor_floats(self, floats): - with pytest.raises(TypeError): + msg = r"PeriodIndex\._simple_new does not accept floats" + with pytest.raises(TypeError, match=msg): pd.PeriodIndex._simple_new(floats, freq='M') - with pytest.raises(TypeError): + msg = "PeriodIndex does not allow floating point in construction" + with pytest.raises(TypeError, match=msg): pd.PeriodIndex(floats, freq='M') def test_constructor_nat(self): - pytest.raises(ValueError, period_range, start='NaT', - end='2011-01-01', freq='M') - pytest.raises(ValueError, period_range, start='2011-01-01', - end='NaT', freq='M') + msg = "start and end must not be NaT" + with pytest.raises(ValueError, match=msg): + period_range(start='NaT', end='2011-01-01', freq='M') + with pytest.raises(ValueError, match=msg): + period_range(start='2011-01-01', end='NaT', freq='M') def test_constructor_year_and_quarter(self): year = pd.Series([2001, 2002, 2003]) @@ -455,9 +483,12 @@ def test_constructor(self): # Mixed freq should fail vals = [end_intv, Period('2006-12-31', 'w')] - pytest.raises(ValueError, PeriodIndex, vals) + msg = r"Input has different freq=W-SUN from PeriodIndex\(freq=B\)" + with pytest.raises(IncompatibleFrequency, match=msg): + PeriodIndex(vals) vals = np.array(vals) - pytest.raises(ValueError, PeriodIndex, vals) + with pytest.raises(IncompatibleFrequency, match=msg): + PeriodIndex(vals) def test_constructor_error(self): start = Period('02-Apr-2005', 'B') @@ -508,7 +539,8 @@ def setup_method(self, method): self.series = Series(period_range('2000-01-01', periods=10, freq='D')) def test_constructor_cant_cast_period(self): - with pytest.raises(TypeError): + msg = "Cannot cast PeriodArray to dtype float64" + with pytest.raises(TypeError, match=msg): Series(period_range('2000-01-01', periods=10, freq='D'), dtype=float) diff --git a/pandas/tests/indexes/period/test_indexing.py b/pandas/tests/indexes/period/test_indexing.py index 47c2edfd13395..fa8199b4e6163 100644 --- a/pandas/tests/indexes/period/test_indexing.py +++ b/pandas/tests/indexes/period/test_indexing.py @@ -84,7 +84,8 @@ def test_getitem_partial(self): rng = period_range('2007-01', periods=50, freq='M') ts = Series(np.random.randn(len(rng)), rng) - pytest.raises(KeyError, ts.__getitem__, '2006') + with pytest.raises(KeyError, match=r"^'2006'$"): + ts['2006'] result = ts['2008'] assert (result.index.year == 2008).all() @@ -326,7 +327,8 @@ def test_take_fill_value(self): with pytest.raises(ValueError, match=msg): idx.take(np.array([1, 0, -5]), fill_value=True) - with pytest.raises(IndexError): + msg = "index -5 is out of bounds for size 3" + with pytest.raises(IndexError, match=msg): idx.take(np.array([1, -5])) @@ -335,7 +337,8 @@ class TestIndexing(object): def test_get_loc_msg(self): idx = period_range('2000-1-1', freq='A', periods=10) bad_period = Period('2012', 'A') - pytest.raises(KeyError, idx.get_loc, bad_period) + with pytest.raises(KeyError, match=r"^Period\('2012', 'A-DEC'\)$"): + idx.get_loc(bad_period) try: idx.get_loc(bad_period) @@ -373,8 +376,13 @@ def test_get_loc(self): msg = "Cannot interpret 'foo' as period" with pytest.raises(KeyError, match=msg): idx0.get_loc('foo') - pytest.raises(KeyError, idx0.get_loc, 1.1) - pytest.raises(TypeError, idx0.get_loc, idx0) + with pytest.raises(KeyError, match=r"^1\.1$"): + idx0.get_loc(1.1) + + msg = (r"'PeriodIndex\(\['2017-09-01', '2017-09-02', '2017-09-03'\]," + r" dtype='period\[D\]', freq='D'\)' is an invalid key") + with pytest.raises(TypeError, match=msg): + idx0.get_loc(idx0) # get the location of p1/p2 from # monotonic increasing PeriodIndex with duplicate @@ -391,8 +399,13 @@ def test_get_loc(self): with pytest.raises(KeyError, match=msg): idx1.get_loc('foo') - pytest.raises(KeyError, idx1.get_loc, 1.1) - pytest.raises(TypeError, idx1.get_loc, idx1) + with pytest.raises(KeyError, match=r"^1\.1$"): + idx1.get_loc(1.1) + + msg = (r"'PeriodIndex\(\['2017-09-02', '2017-09-02', '2017-09-03'\]," + r" dtype='period\[D\]', freq='D'\)' is an invalid key") + with pytest.raises(TypeError, match=msg): + idx1.get_loc(idx1) # get the location of p1/p2 from # non-monotonic increasing/decreasing PeriodIndex with duplicate @@ -441,18 +454,6 @@ def test_is_monotonic_decreasing(self): assert idx_dec1.is_monotonic_decreasing is True assert idx.is_monotonic_decreasing is False - def test_is_unique(self): - # GH 17717 - p0 = pd.Period('2017-09-01') - p1 = pd.Period('2017-09-02') - p2 = pd.Period('2017-09-03') - - idx0 = pd.PeriodIndex([p0, p1, p2]) - assert idx0.is_unique is True - - idx1 = pd.PeriodIndex([p1, p1, p2]) - assert idx1.is_unique is False - def test_contains(self): # GH 17717 p0 = pd.Period('2017-09-01') @@ -581,7 +582,7 @@ def test_get_loc2(self): msg = 'Input has different freq=None from PeriodArray\\(freq=D\\)' with pytest.raises(ValueError, match=msg): idx.get_loc('2000-01-10', method='nearest', tolerance='1 hour') - with pytest.raises(KeyError): + with pytest.raises(KeyError, match=r"^Period\('2000-01-10', 'D'\)$"): idx.get_loc('2000-01-10', method='nearest', tolerance='1 day') with pytest.raises( ValueError, diff --git a/pandas/tests/indexes/period/test_period.py b/pandas/tests/indexes/period/test_period.py index dc9a32d75d272..89bcf56dbda71 100644 --- a/pandas/tests/indexes/period/test_period.py +++ b/pandas/tests/indexes/period/test_period.py @@ -71,10 +71,12 @@ def test_fillna_period(self): pd.Period('2011-01-01', freq='D')), exp) def test_no_millisecond_field(self): - with pytest.raises(AttributeError): + msg = "type object 'DatetimeIndex' has no attribute 'millisecond'" + with pytest.raises(AttributeError, match=msg): DatetimeIndex.millisecond - with pytest.raises(AttributeError): + msg = "'DatetimeIndex' object has no attribute 'millisecond'" + with pytest.raises(AttributeError, match=msg): DatetimeIndex([]).millisecond @pytest.mark.parametrize("sort", [None, False]) @@ -98,8 +100,8 @@ def test_difference_freq(self, sort): def test_hash_error(self): index = period_range('20010101', periods=10) - with pytest.raises(TypeError, match=("unhashable type: %r" % - type(index).__name__)): + msg = "unhashable type: '{}'".format(type(index).__name__) + with pytest.raises(TypeError, match=msg): hash(index) def test_make_time_series(self): @@ -124,7 +126,8 @@ def test_shallow_copy_i8(self): def test_shallow_copy_changing_freq_raises(self): pi = period_range("2018-01-01", periods=3, freq="2D") - with pytest.raises(IncompatibleFrequency, match="are different"): + msg = "specified freq and dtype are different" + with pytest.raises(IncompatibleFrequency, match=msg): pi._shallow_copy(pi, freq="H") def test_dtype_str(self): @@ -214,21 +217,17 @@ def test_period_index_length(self): assert (i1 == i2).all() assert i1.freq == i2.freq - try: + msg = "start and end must have same freq" + with pytest.raises(ValueError, match=msg): period_range(start=start, end=end_intv) - raise AssertionError('Cannot allow mixed freq for start and end') - except ValueError: - pass end_intv = Period('2005-05-01', 'B') i1 = period_range(start=start, end=end_intv) - try: + msg = ("Of the three parameters: start, end, and periods, exactly two" + " must be specified") + with pytest.raises(ValueError, match=msg): period_range(start=start) - raise AssertionError( - 'Must specify periods if missing start or end') - except ValueError: - pass # infer freq from first element i2 = PeriodIndex([end_intv, Period('2005-05-05', 'B')]) @@ -241,9 +240,12 @@ def test_period_index_length(self): # Mixed freq should fail vals = [end_intv, Period('2006-12-31', 'w')] - pytest.raises(ValueError, PeriodIndex, vals) + msg = r"Input has different freq=W-SUN from PeriodIndex\(freq=B\)" + with pytest.raises(IncompatibleFrequency, match=msg): + PeriodIndex(vals) vals = np.array(vals) - pytest.raises(ValueError, PeriodIndex, vals) + with pytest.raises(ValueError, match=msg): + PeriodIndex(vals) def test_fields(self): # year, month, day, hour, minute @@ -381,7 +383,9 @@ def test_contains_nat(self): assert np.nan in idx def test_periods_number_check(self): - with pytest.raises(ValueError): + msg = ("Of the three parameters: start, end, and periods, exactly two" + " must be specified") + with pytest.raises(ValueError, match=msg): period_range('2011-1-1', '2012-1-1', 'B') def test_start_time(self): @@ -500,7 +504,8 @@ def test_is_full(self): assert index.is_full index = PeriodIndex([2006, 2005, 2005], freq='A') - pytest.raises(ValueError, getattr, index, 'is_full') + with pytest.raises(ValueError, match="Index is not monotonic"): + index.is_full assert index[:0].is_full @@ -574,5 +579,6 @@ def test_maybe_convert_timedelta(): assert pi._maybe_convert_timedelta(2) == 2 offset = offsets.BusinessDay() - with pytest.raises(ValueError, match='freq'): + msg = r"Input has different freq=B from PeriodIndex\(freq=D\)" + with pytest.raises(ValueError, match=msg): pi._maybe_convert_timedelta(offset) diff --git a/pandas/tests/indexes/test_category.py b/pandas/tests/indexes/test_category.py index 582d466c6178e..d889135160ae2 100644 --- a/pandas/tests/indexes/test_category.py +++ b/pandas/tests/indexes/test_category.py @@ -611,15 +611,6 @@ def test_is_monotonic(self, data, non_lexsorted_data): assert c.is_monotonic_increasing is True assert c.is_monotonic_decreasing is False - @pytest.mark.parametrize('values, expected', [ - ([1, 2, 3], True), - ([1, 3, 1], False), - (list('abc'), True), - (list('aba'), False)]) - def test_is_unique(self, values, expected): - ci = CategoricalIndex(values) - assert ci.is_unique is expected - def test_has_duplicates(self): idx = CategoricalIndex([0, 0, 0], name='foo') diff --git a/pandas/tests/indexing/test_chaining_and_caching.py b/pandas/tests/indexing/test_chaining_and_caching.py index be0d9c5cf24ca..6070edca075c2 100644 --- a/pandas/tests/indexing/test_chaining_and_caching.py +++ b/pandas/tests/indexing/test_chaining_and_caching.py @@ -357,7 +357,6 @@ def check(result, expected): check(result4, expected) @pytest.mark.filterwarnings("ignore::DeprecationWarning") - @pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning") def test_cache_updating(self): # GH 4939, make sure to update the cache on setitem @@ -367,12 +366,6 @@ def test_cache_updating(self): assert "Hello Friend" in df['A'].index assert "Hello Friend" in df['B'].index - panel = tm.makePanel() - panel.ix[0] # get first item into cache - panel.ix[:, :, 'A+1'] = panel.ix[:, :, 'A'] + 1 - assert "A+1" in panel.ix[0].columns - assert "A+1" in panel.ix[1].columns - # 10264 df = DataFrame(np.zeros((5, 5), dtype='int64'), columns=[ 'a', 'b', 'c', 'd', 'e'], index=range(5)) diff --git a/pandas/tests/io/test_excel.py b/pandas/tests/io/test_excel.py index 717e9bc23c6b1..8c92db734168b 100644 --- a/pandas/tests/io/test_excel.py +++ b/pandas/tests/io/test_excel.py @@ -5,7 +5,6 @@ from functools import partial import os import warnings -from warnings import catch_warnings import numpy as np from numpy import nan @@ -2382,15 +2381,12 @@ def check_called(func): assert isinstance(writer, DummyClass) df = tm.makeCustomDataframe(1, 1) - with catch_warnings(record=True): - panel = tm.makePanel() - func = lambda: df.to_excel('something.test') - check_called(func) - check_called(lambda: panel.to_excel('something.test')) - check_called(lambda: df.to_excel('something.xlsx')) - check_called( - lambda: df.to_excel( - 'something.xls', engine='dummy')) + func = lambda: df.to_excel('something.test') + check_called(func) + check_called(lambda: df.to_excel('something.xlsx')) + check_called( + lambda: df.to_excel( + 'something.xls', engine='dummy')) @pytest.mark.parametrize('engine', [ diff --git a/pandas/tests/io/test_pytables.py b/pandas/tests/io/test_pytables.py index c339c33751b5f..4a806b178c6ee 100644 --- a/pandas/tests/io/test_pytables.py +++ b/pandas/tests/io/test_pytables.py @@ -19,11 +19,11 @@ import pandas as pd from pandas import ( Categorical, DataFrame, DatetimeIndex, Index, Int64Index, MultiIndex, - Panel, RangeIndex, Series, Timestamp, bdate_range, compat, concat, - date_range, isna, timedelta_range) + RangeIndex, Series, Timestamp, bdate_range, compat, concat, date_range, + isna, timedelta_range) import pandas.util.testing as tm from pandas.util.testing import ( - assert_frame_equal, assert_panel_equal, assert_series_equal, set_timezone) + assert_frame_equal, assert_series_equal, set_timezone) from pandas.io import pytables as pytables # noqa:E402 from pandas.io.formats.printing import pprint_thing @@ -185,11 +185,6 @@ def roundtrip(key, obj, **kwargs): o = tm.makeDataFrame() assert_frame_equal(o, roundtrip('frame', o)) - with catch_warnings(record=True): - - o = tm.makePanel() - assert_panel_equal(o, roundtrip('panel', o)) - # table df = DataFrame(dict(A=lrange(5), B=lrange(5))) df.to_hdf(path, 'table', append=True) @@ -348,11 +343,9 @@ def test_keys(self): store['a'] = tm.makeTimeSeries() store['b'] = tm.makeStringSeries() store['c'] = tm.makeDataFrame() - with catch_warnings(record=True): - store['d'] = tm.makePanel() - store['foo/bar'] = tm.makePanel() - assert len(store) == 5 - expected = {'/a', '/b', '/c', '/d', '/foo/bar'} + + assert len(store) == 3 + expected = {'/a', '/b', '/c'} assert set(store.keys()) == expected assert set(store) == expected @@ -388,11 +381,6 @@ def test_repr(self): store['b'] = tm.makeStringSeries() store['c'] = tm.makeDataFrame() - with catch_warnings(record=True): - store['d'] = tm.makePanel() - store['foo/bar'] = tm.makePanel() - store.append('e', tm.makePanel()) - df = tm.makeDataFrame() df['obj1'] = 'foo' df['obj2'] = 'bar' @@ -936,21 +924,6 @@ def test_append(self): store.append('/df3 foo', df[10:]) tm.assert_frame_equal(store['df3 foo'], df) - # panel - wp = tm.makePanel() - _maybe_remove(store, 'wp1') - store.append('wp1', wp.iloc[:, :10, :]) - store.append('wp1', wp.iloc[:, 10:, :]) - assert_panel_equal(store['wp1'], wp) - - # test using differt order of items on the non-index axes - _maybe_remove(store, 'wp1') - wp_append1 = wp.iloc[:, :10, :] - store.append('wp1', wp_append1) - wp_append2 = wp.iloc[:, 10:, :].reindex(items=wp.items[::-1]) - store.append('wp1', wp_append2) - assert_panel_equal(store['wp1'], wp) - # dtype issues - mizxed type in a single object column df = DataFrame(data=[[1, 2], [0, 1], [1, 2], [0, 0]]) df['mixed_column'] = 'testing' @@ -1254,22 +1227,6 @@ def test_append_all_nans(self): reloaded = read_hdf(path, 'df_with_missing') tm.assert_frame_equal(df_with_missing, reloaded) - matrix = [[[np.nan, np.nan, np.nan], [1, np.nan, np.nan]], - [[np.nan, np.nan, np.nan], [np.nan, 5, 6]], - [[np.nan, np.nan, np.nan], [np.nan, 3, np.nan]]] - - with catch_warnings(record=True): - panel_with_missing = Panel(matrix, - items=['Item1', 'Item2', 'Item3'], - major_axis=[1, 2], - minor_axis=['A', 'B', 'C']) - - with ensure_clean_path(self.path) as path: - panel_with_missing.to_hdf( - path, 'panel_with_missing', format='table') - reloaded_panel = read_hdf(path, 'panel_with_missing') - tm.assert_panel_equal(panel_with_missing, reloaded_panel) - def test_append_frame_column_oriented(self): with ensure_clean_store(self.path) as store: @@ -1342,40 +1299,11 @@ def test_append_with_strings(self): with ensure_clean_store(self.path) as store: with catch_warnings(record=True): - simplefilter("ignore", FutureWarning) - wp = tm.makePanel() - wp2 = wp.rename( - minor_axis={x: "%s_extra" % x for x in wp.minor_axis}) def check_col(key, name, size): assert getattr(store.get_storer(key) .table.description, name).itemsize == size - store.append('s1', wp, min_itemsize=20) - store.append('s1', wp2) - expected = concat([wp, wp2], axis=2) - expected = expected.reindex( - minor_axis=sorted(expected.minor_axis)) - assert_panel_equal(store['s1'], expected) - check_col('s1', 'minor_axis', 20) - - # test dict format - store.append('s2', wp, min_itemsize={'minor_axis': 20}) - store.append('s2', wp2) - expected = concat([wp, wp2], axis=2) - expected = expected.reindex( - minor_axis=sorted(expected.minor_axis)) - assert_panel_equal(store['s2'], expected) - check_col('s2', 'minor_axis', 20) - - # apply the wrong field (similar to #1) - store.append('s3', wp, min_itemsize={'major_axis': 20}) - pytest.raises(ValueError, store.append, 's3', wp2) - - # test truncation of bigger strings - store.append('s4', wp) - pytest.raises(ValueError, store.append, 's4', wp2) - # avoid truncation on elements df = DataFrame([[123, 'asdqwerty'], [345, 'dggnhebbsdfbdfb']]) store.append('df_big', df) @@ -1674,32 +1602,6 @@ def check_col(key, name, size): (df_dc.string == 'foo')] tm.assert_frame_equal(result, expected) - with ensure_clean_store(self.path) as store: - with catch_warnings(record=True): - # panel - # GH5717 not handling data_columns - np.random.seed(1234) - p = tm.makePanel() - - store.append('p1', p) - tm.assert_panel_equal(store.select('p1'), p) - - store.append('p2', p, data_columns=True) - tm.assert_panel_equal(store.select('p2'), p) - - result = store.select('p2', where='ItemA>0') - expected = p.to_frame() - expected = expected[expected['ItemA'] > 0] - tm.assert_frame_equal(result.to_frame(), expected) - - result = store.select( - 'p2', where='ItemA>0 & minor_axis=["A","B"]') - expected = p.to_frame() - expected = expected[expected['ItemA'] > 0] - expected = expected[expected.reset_index( - level=['major']).index.isin(['A', 'B'])] - tm.assert_frame_equal(result.to_frame(), expected) - def test_create_table_index(self): with ensure_clean_store(self.path) as store: @@ -1708,37 +1610,6 @@ def test_create_table_index(self): def col(t, column): return getattr(store.get_storer(t).table.cols, column) - # index=False - wp = tm.makePanel() - store.append('p5', wp, index=False) - store.create_table_index('p5', columns=['major_axis']) - assert(col('p5', 'major_axis').is_indexed is True) - assert(col('p5', 'minor_axis').is_indexed is False) - - # index=True - store.append('p5i', wp, index=True) - assert(col('p5i', 'major_axis').is_indexed is True) - assert(col('p5i', 'minor_axis').is_indexed is True) - - # default optlevels - store.get_storer('p5').create_index() - assert(col('p5', 'major_axis').index.optlevel == 6) - assert(col('p5', 'minor_axis').index.kind == 'medium') - - # let's change the indexing scheme - store.create_table_index('p5') - assert(col('p5', 'major_axis').index.optlevel == 6) - assert(col('p5', 'minor_axis').index.kind == 'medium') - store.create_table_index('p5', optlevel=9) - assert(col('p5', 'major_axis').index.optlevel == 9) - assert(col('p5', 'minor_axis').index.kind == 'medium') - store.create_table_index('p5', kind='full') - assert(col('p5', 'major_axis').index.optlevel == 9) - assert(col('p5', 'minor_axis').index.kind == 'full') - store.create_table_index('p5', optlevel=1, kind='light') - assert(col('p5', 'major_axis').index.optlevel == 1) - assert(col('p5', 'minor_axis').index.kind == 'light') - # data columns df = tm.makeTimeDataFrame() df['string'] = 'foo' @@ -1761,19 +1632,6 @@ def col(t, column): store.put('f2', df) pytest.raises(TypeError, store.create_table_index, 'f2') - def test_append_diff_item_order(self): - - with catch_warnings(record=True): - wp = tm.makePanel() - wp1 = wp.iloc[:, :10, :] - wp2 = wp.iloc[wp.items.get_indexer(['ItemC', 'ItemB', 'ItemA']), - 10:, :] - - with ensure_clean_store(self.path) as store: - store.put('panel', wp1, format='table') - pytest.raises(ValueError, store.put, 'panel', wp2, - append=True) - def test_append_hierarchical(self): index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two', 'three']], @@ -1987,10 +1845,6 @@ def check(obj, comparator): df['time2'] = Timestamp('20130102') check(df, tm.assert_frame_equal) - with catch_warnings(record=True): - p = tm.makePanel() - check(p, assert_panel_equal) - # empty frame, GH4273 with ensure_clean_store(self.path) as store: @@ -2011,24 +1865,6 @@ def check(obj, comparator): store.put('df2', df) assert_frame_equal(store.select('df2'), df) - with catch_warnings(record=True): - - # 0 len - p_empty = Panel(items=list('ABC')) - store.append('p', p_empty) - pytest.raises(KeyError, store.select, 'p') - - # repeated append of 0/non-zero frames - p = Panel(np.random.randn(3, 4, 5), items=list('ABC')) - store.append('p', p) - assert_panel_equal(store.select('p'), p) - store.append('p', p_empty) - assert_panel_equal(store.select('p'), p) - - # store - store.put('p2', p_empty) - assert_panel_equal(store.select('p2'), p_empty) - def test_append_raise(self): with ensure_clean_store(self.path) as store: @@ -2143,24 +1979,6 @@ def test_table_mixed_dtypes(self): store.append('df1_mixed', df) tm.assert_frame_equal(store.select('df1_mixed'), df) - with catch_warnings(record=True): - - # panel - wp = tm.makePanel() - wp['obj1'] = 'foo' - wp['obj2'] = 'bar' - wp['bool1'] = wp['ItemA'] > 0 - wp['bool2'] = wp['ItemB'] > 0 - wp['int1'] = 1 - wp['int2'] = 2 - wp = wp._consolidate() - - with catch_warnings(record=True): - - with ensure_clean_store(self.path) as store: - store.append('p1_mixed', wp) - assert_panel_equal(store.select('p1_mixed'), wp) - def test_unimplemented_dtypes_table_columns(self): with ensure_clean_store(self.path) as store: @@ -2308,193 +2126,6 @@ def test_remove(self): del store['b'] assert len(store) == 0 - def test_remove_where(self): - - with ensure_clean_store(self.path) as store: - - with catch_warnings(record=True): - - # non-existance - crit1 = 'index>foo' - pytest.raises(KeyError, store.remove, 'a', [crit1]) - - # try to remove non-table (with crit) - # non-table ok (where = None) - wp = tm.makePanel(30) - store.put('wp', wp, format='table') - store.remove('wp', ["minor_axis=['A', 'D']"]) - rs = store.select('wp') - expected = wp.reindex(minor_axis=['B', 'C']) - assert_panel_equal(rs, expected) - - # empty where - _maybe_remove(store, 'wp') - store.put('wp', wp, format='table') - - # deleted number (entire table) - n = store.remove('wp', []) - assert n == 120 - - # non - empty where - _maybe_remove(store, 'wp') - store.put('wp', wp, format='table') - pytest.raises(ValueError, store.remove, - 'wp', ['foo']) - - def test_remove_startstop(self): - # GH #4835 and #6177 - - with ensure_clean_store(self.path) as store: - - with catch_warnings(record=True): - wp = tm.makePanel(30) - - # start - _maybe_remove(store, 'wp1') - store.put('wp1', wp, format='t') - n = store.remove('wp1', start=32) - assert n == 120 - 32 - result = store.select('wp1') - expected = wp.reindex(major_axis=wp.major_axis[:32 // 4]) - assert_panel_equal(result, expected) - - _maybe_remove(store, 'wp2') - store.put('wp2', wp, format='t') - n = store.remove('wp2', start=-32) - assert n == 32 - result = store.select('wp2') - expected = wp.reindex(major_axis=wp.major_axis[:-32 // 4]) - assert_panel_equal(result, expected) - - # stop - _maybe_remove(store, 'wp3') - store.put('wp3', wp, format='t') - n = store.remove('wp3', stop=32) - assert n == 32 - result = store.select('wp3') - expected = wp.reindex(major_axis=wp.major_axis[32 // 4:]) - assert_panel_equal(result, expected) - - _maybe_remove(store, 'wp4') - store.put('wp4', wp, format='t') - n = store.remove('wp4', stop=-32) - assert n == 120 - 32 - result = store.select('wp4') - expected = wp.reindex(major_axis=wp.major_axis[-32 // 4:]) - assert_panel_equal(result, expected) - - # start n stop - _maybe_remove(store, 'wp5') - store.put('wp5', wp, format='t') - n = store.remove('wp5', start=16, stop=-16) - assert n == 120 - 32 - result = store.select('wp5') - expected = wp.reindex( - major_axis=(wp.major_axis[:16 // 4] - .union(wp.major_axis[-16 // 4:]))) - assert_panel_equal(result, expected) - - _maybe_remove(store, 'wp6') - store.put('wp6', wp, format='t') - n = store.remove('wp6', start=16, stop=16) - assert n == 0 - result = store.select('wp6') - expected = wp.reindex(major_axis=wp.major_axis) - assert_panel_equal(result, expected) - - # with where - _maybe_remove(store, 'wp7') - - # TODO: unused? - date = wp.major_axis.take(np.arange(0, 30, 3)) # noqa - - crit = 'major_axis=date' - store.put('wp7', wp, format='t') - n = store.remove('wp7', where=[crit], stop=80) - assert n == 28 - result = store.select('wp7') - expected = wp.reindex(major_axis=wp.major_axis.difference( - wp.major_axis[np.arange(0, 20, 3)])) - assert_panel_equal(result, expected) - - def test_remove_crit(self): - - with ensure_clean_store(self.path) as store: - - with catch_warnings(record=True): - wp = tm.makePanel(30) - - # group row removal - _maybe_remove(store, 'wp3') - date4 = wp.major_axis.take([0, 1, 2, 4, 5, 6, 8, 9, 10]) - crit4 = 'major_axis=date4' - store.put('wp3', wp, format='t') - n = store.remove('wp3', where=[crit4]) - assert n == 36 - - result = store.select('wp3') - expected = wp.reindex( - major_axis=wp.major_axis.difference(date4)) - assert_panel_equal(result, expected) - - # upper half - _maybe_remove(store, 'wp') - store.put('wp', wp, format='table') - date = wp.major_axis[len(wp.major_axis) // 2] - - crit1 = 'major_axis>date' - crit2 = "minor_axis=['A', 'D']" - n = store.remove('wp', where=[crit1]) - assert n == 56 - - n = store.remove('wp', where=[crit2]) - assert n == 32 - - result = store['wp'] - expected = wp.truncate(after=date).reindex(minor=['B', 'C']) - assert_panel_equal(result, expected) - - # individual row elements - _maybe_remove(store, 'wp2') - store.put('wp2', wp, format='table') - - date1 = wp.major_axis[1:3] - crit1 = 'major_axis=date1' - store.remove('wp2', where=[crit1]) - result = store.select('wp2') - expected = wp.reindex( - major_axis=wp.major_axis.difference(date1)) - assert_panel_equal(result, expected) - - date2 = wp.major_axis[5] - crit2 = 'major_axis=date2' - store.remove('wp2', where=[crit2]) - result = store['wp2'] - expected = wp.reindex( - major_axis=(wp.major_axis - .difference(date1) - .difference(Index([date2])) - )) - assert_panel_equal(result, expected) - - date3 = [wp.major_axis[7], wp.major_axis[9]] - crit3 = 'major_axis=date3' - store.remove('wp2', where=[crit3]) - result = store['wp2'] - expected = wp.reindex(major_axis=wp.major_axis - .difference(date1) - .difference(Index([date2])) - .difference(Index(date3))) - assert_panel_equal(result, expected) - - # corners - _maybe_remove(store, 'wp4') - store.put('wp4', wp, format='table') - n = store.remove( - 'wp4', where="major_axis>wp.major_axis[-1]") - result = store.select('wp4') - assert_panel_equal(result, wp) - def test_invalid_terms(self): with ensure_clean_store(self.path) as store: @@ -2504,27 +2135,16 @@ def test_invalid_terms(self): df = tm.makeTimeDataFrame() df['string'] = 'foo' df.loc[0:4, 'string'] = 'bar' - wp = tm.makePanel() store.put('df', df, format='table') - store.put('wp', wp, format='table') # some invalid terms - pytest.raises(ValueError, store.select, - 'wp', "minor=['A', 'B']") - pytest.raises(ValueError, store.select, - 'wp', ["index=['20121114']"]) - pytest.raises(ValueError, store.select, 'wp', [ - "index=['20121114', '20121114']"]) pytest.raises(TypeError, Term) # more invalid pytest.raises( ValueError, store.select, 'df', 'df.index[3]') pytest.raises(SyntaxError, store.select, 'df', 'index>') - pytest.raises( - ValueError, store.select, 'wp', - "major_axis<'20000108' & minor_axis['A', 'B']") # from the docs with ensure_clean_path(self.path) as path: @@ -2546,127 +2166,6 @@ def test_invalid_terms(self): pytest.raises(ValueError, read_hdf, path, 'dfq', where="A>0 or C>0") - def test_terms(self): - - with ensure_clean_store(self.path) as store: - - with catch_warnings(record=True): - simplefilter("ignore", FutureWarning) - - wp = tm.makePanel() - wpneg = Panel.fromDict({-1: tm.makeDataFrame(), - 0: tm.makeDataFrame(), - 1: tm.makeDataFrame()}) - - store.put('wp', wp, format='table') - store.put('wpneg', wpneg, format='table') - - # panel - result = store.select( - 'wp', - "major_axis<'20000108' and minor_axis=['A', 'B']") - expected = wp.truncate( - after='20000108').reindex(minor=['A', 'B']) - assert_panel_equal(result, expected) - - # with deprecation - result = store.select( - 'wp', where=("major_axis<'20000108' " - "and minor_axis=['A', 'B']")) - expected = wp.truncate( - after='20000108').reindex(minor=['A', 'B']) - tm.assert_panel_equal(result, expected) - - with catch_warnings(record=True): - - # valid terms - terms = [('major_axis=20121114'), - ('major_axis>20121114'), - (("major_axis=['20121114', '20121114']"),), - ('major_axis=datetime.datetime(2012, 11, 14)'), - 'major_axis> 20121114', - 'major_axis >20121114', - 'major_axis > 20121114', - (("minor_axis=['A', 'B']"),), - (("minor_axis=['A', 'B']"),), - ((("minor_axis==['A', 'B']"),),), - (("items=['ItemA', 'ItemB']"),), - ('items=ItemA'), - ] - - for t in terms: - store.select('wp', t) - - with pytest.raises(TypeError, - match='Only named functions are supported'): - store.select( - 'wp', - 'major_axis == (lambda x: x)("20130101")') - - with catch_warnings(record=True): - # check USub node parsing - res = store.select('wpneg', 'items == -1') - expected = Panel({-1: wpneg[-1]}) - tm.assert_panel_equal(res, expected) - - msg = 'Unary addition not supported' - with pytest.raises(NotImplementedError, match=msg): - store.select('wpneg', 'items == +1') - - def test_term_compat(self): - with ensure_clean_store(self.path) as store: - - with catch_warnings(record=True): - wp = Panel(np.random.randn(2, 5, 4), items=['Item1', 'Item2'], - major_axis=date_range('1/1/2000', periods=5), - minor_axis=['A', 'B', 'C', 'D']) - store.append('wp', wp) - - result = store.select( - 'wp', where=("major_axis>20000102 " - "and minor_axis=['A', 'B']")) - expected = wp.loc[:, wp.major_axis > - Timestamp('20000102'), ['A', 'B']] - assert_panel_equal(result, expected) - - store.remove('wp', 'major_axis>20000103') - result = store.select('wp') - expected = wp.loc[:, wp.major_axis <= Timestamp('20000103'), :] - assert_panel_equal(result, expected) - - with ensure_clean_store(self.path) as store: - - with catch_warnings(record=True): - wp = Panel(np.random.randn(2, 5, 4), - items=['Item1', 'Item2'], - major_axis=date_range('1/1/2000', periods=5), - minor_axis=['A', 'B', 'C', 'D']) - store.append('wp', wp) - - # stringified datetimes - result = store.select( - 'wp', 'major_axis>datetime.datetime(2000, 1, 2)') - expected = wp.loc[:, wp.major_axis > Timestamp('20000102')] - assert_panel_equal(result, expected) - - result = store.select( - 'wp', 'major_axis>datetime.datetime(2000, 1, 2)') - expected = wp.loc[:, wp.major_axis > Timestamp('20000102')] - assert_panel_equal(result, expected) - - result = store.select( - 'wp', - "major_axis=[datetime.datetime(2000, 1, 2, 0, 0), " - "datetime.datetime(2000, 1, 3, 0, 0)]") - expected = wp.loc[:, [Timestamp('20000102'), - Timestamp('20000103')]] - assert_panel_equal(result, expected) - - result = store.select( - 'wp', "minor_axis=['A', 'B']") - expected = wp.loc[:, :, ['A', 'B']] - assert_panel_equal(result, expected) - def test_same_name_scoping(self): with ensure_clean_store(self.path) as store: @@ -2982,12 +2481,6 @@ def _make_one(): self._check_roundtrip(df1['int1'], tm.assert_series_equal, compression=compression) - def test_wide(self): - - with catch_warnings(record=True): - wp = tm.makePanel() - self._check_roundtrip(wp, assert_panel_equal) - @pytest.mark.filterwarnings( "ignore:\\nduplicate:pandas.io.pytables.DuplicateWarning" ) @@ -3096,34 +2589,6 @@ def test_select(self): with ensure_clean_store(self.path) as store: with catch_warnings(record=True): - wp = tm.makePanel() - - # put/select ok - _maybe_remove(store, 'wp') - store.put('wp', wp, format='table') - store.select('wp') - - # non-table ok (where = None) - _maybe_remove(store, 'wp') - store.put('wp2', wp) - store.select('wp2') - - # selection on the non-indexable with a large number of columns - wp = Panel(np.random.randn(100, 100, 100), - items=['Item%03d' % i for i in range(100)], - major_axis=date_range('1/1/2000', periods=100), - minor_axis=['E%03d' % i for i in range(100)]) - - _maybe_remove(store, 'wp') - store.append('wp', wp) - items = ['Item%03d' % i for i in range(80)] - result = store.select('wp', 'items=items') - expected = wp.reindex(items=items) - assert_panel_equal(expected, result) - - # selectin non-table with a where - # pytest.raises(ValueError, store.select, - # 'wp2', ('column', ['A', 'D'])) # select with columns= df = tm.makeTimeDataFrame() @@ -3652,31 +3117,6 @@ def test_retain_index_attributes2(self): assert read_hdf(path, 'data').index.name is None - def test_panel_select(self): - - with ensure_clean_store(self.path) as store: - - with catch_warnings(record=True): - - wp = tm.makePanel() - - store.put('wp', wp, format='table') - date = wp.major_axis[len(wp.major_axis) // 2] - - crit1 = ('major_axis>=date') - crit2 = ("minor_axis=['A', 'D']") - - result = store.select('wp', [crit1, crit2]) - expected = wp.truncate(before=date).reindex(minor=['A', 'D']) - assert_panel_equal(result, expected) - - result = store.select( - 'wp', ['major_axis>="20000124"', - ("minor_axis=['A', 'B']")]) - expected = wp.truncate( - before='20000124').reindex(minor=['A', 'B']) - assert_panel_equal(result, expected) - def test_frame_select(self): df = tm.makeTimeDataFrame() @@ -5300,35 +4740,30 @@ def test_complex_mixed_table(self): reread = read_hdf(path, 'df') assert_frame_equal(df, reread) - @pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning") def test_complex_across_dimensions_fixed(self): with catch_warnings(record=True): complex128 = np.array( [1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j]) s = Series(complex128, index=list('abcd')) df = DataFrame({'A': s, 'B': s}) - p = Panel({'One': df, 'Two': df}) - objs = [s, df, p] - comps = [tm.assert_series_equal, tm.assert_frame_equal, - tm.assert_panel_equal] + objs = [s, df] + comps = [tm.assert_series_equal, tm.assert_frame_equal] for obj, comp in zip(objs, comps): with ensure_clean_path(self.path) as path: obj.to_hdf(path, 'obj', format='fixed') reread = read_hdf(path, 'obj') comp(obj, reread) - @pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning") def test_complex_across_dimensions(self): complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j]) s = Series(complex128, index=list('abcd')) df = DataFrame({'A': s, 'B': s}) with catch_warnings(record=True): - p = Panel({'One': df, 'Two': df}) - objs = [df, p] - comps = [tm.assert_frame_equal, tm.assert_panel_equal] + objs = [df] + comps = [tm.assert_frame_equal] for obj, comp in zip(objs, comps): with ensure_clean_path(self.path) as path: obj.to_hdf(path, 'obj', format='table') diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py index e21f9d0291afa..5d7a9ab6f4cf0 100644 --- a/pandas/tests/reshape/merge/test_join.py +++ b/pandas/tests/reshape/merge/test_join.py @@ -1,7 +1,5 @@ # pylint: disable=E1103 -from warnings import catch_warnings - import numpy as np from numpy.random import randn import pytest @@ -657,95 +655,6 @@ def test_join_dups(self): 'y_y', 'x_x', 'y_x', 'x_y', 'y_y'] assert_frame_equal(dta, expected) - def test_panel_join(self): - with catch_warnings(record=True): - panel = tm.makePanel() - tm.add_nans(panel) - - p1 = panel.iloc[:2, :10, :3] - p2 = panel.iloc[2:, 5:, 2:] - - # left join - result = p1.join(p2) - expected = p1.copy() - expected['ItemC'] = p2['ItemC'] - tm.assert_panel_equal(result, expected) - - # right join - result = p1.join(p2, how='right') - expected = p2.copy() - expected['ItemA'] = p1['ItemA'] - expected['ItemB'] = p1['ItemB'] - expected = expected.reindex(items=['ItemA', 'ItemB', 'ItemC']) - tm.assert_panel_equal(result, expected) - - # inner join - result = p1.join(p2, how='inner') - expected = panel.iloc[:, 5:10, 2:3] - tm.assert_panel_equal(result, expected) - - # outer join - result = p1.join(p2, how='outer') - expected = p1.reindex(major=panel.major_axis, - minor=panel.minor_axis) - expected = expected.join(p2.reindex(major=panel.major_axis, - minor=panel.minor_axis)) - tm.assert_panel_equal(result, expected) - - def test_panel_join_overlap(self): - with catch_warnings(record=True): - panel = tm.makePanel() - tm.add_nans(panel) - - p1 = panel.loc[['ItemA', 'ItemB', 'ItemC']] - p2 = panel.loc[['ItemB', 'ItemC']] - - # Expected index is - # - # ItemA, ItemB_p1, ItemC_p1, ItemB_p2, ItemC_p2 - joined = p1.join(p2, lsuffix='_p1', rsuffix='_p2') - p1_suf = p1.loc[['ItemB', 'ItemC']].add_suffix('_p1') - p2_suf = p2.loc[['ItemB', 'ItemC']].add_suffix('_p2') - no_overlap = panel.loc[['ItemA']] - expected = no_overlap.join(p1_suf.join(p2_suf)) - tm.assert_panel_equal(joined, expected) - - def test_panel_join_many(self): - with catch_warnings(record=True): - tm.K = 10 - panel = tm.makePanel() - tm.K = 4 - - panels = [panel.iloc[:2], panel.iloc[2:6], panel.iloc[6:]] - - joined = panels[0].join(panels[1:]) - tm.assert_panel_equal(joined, panel) - - panels = [panel.iloc[:2, :-5], - panel.iloc[2:6, 2:], - panel.iloc[6:, 5:-7]] - - data_dict = {} - for p in panels: - data_dict.update(p.iteritems()) - - joined = panels[0].join(panels[1:], how='inner') - expected = pd.Panel.from_dict(data_dict, intersect=True) - tm.assert_panel_equal(joined, expected) - - joined = panels[0].join(panels[1:], how='outer') - expected = pd.Panel.from_dict(data_dict, intersect=False) - tm.assert_panel_equal(joined, expected) - - # edge cases - msg = "Suffixes not supported when passing multiple panels" - with pytest.raises(ValueError, match=msg): - panels[0].join(panels[1:], how='outer', lsuffix='foo', - rsuffix='bar') - msg = "Right join not supported with multiple panels" - with pytest.raises(ValueError, match=msg): - panels[0].join(panels[1:], how='right') - def test_join_multi_to_multi(self, join_type): # GH 20475 leftindex = MultiIndex.from_product([list('abc'), list('xy'), [1, 2]], diff --git a/pandas/tests/reshape/test_concat.py b/pandas/tests/reshape/test_concat.py index ec6123bae327e..a186d32ed8800 100644 --- a/pandas/tests/reshape/test_concat.py +++ b/pandas/tests/reshape/test_concat.py @@ -3,7 +3,7 @@ from datetime import datetime from decimal import Decimal from itertools import combinations -from warnings import catch_warnings, simplefilter +from warnings import catch_warnings import dateutil import numpy as np @@ -1499,15 +1499,6 @@ def test_concat_mixed_objs(self): result = concat([s1, df, s2], ignore_index=True) assert_frame_equal(result, expected) - # invalid concatente of mixed dims - with catch_warnings(record=True): - simplefilter("ignore", FutureWarning) - panel = tm.makePanel() - msg = ("cannot concatenate unaligned mixed dimensional NDFrame" - " objects") - with pytest.raises(ValueError, match=msg): - concat([panel, s1], axis=1) - def test_empty_dtype_coerce(self): # xref to #12411 @@ -1543,34 +1534,6 @@ def test_dtype_coerceion(self): result = concat([df.iloc[[0]], df.iloc[[1]]]) tm.assert_series_equal(result.dtypes, df.dtypes) - @pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning") - def test_panel_concat_other_axes(self): - panel = tm.makePanel() - - p1 = panel.iloc[:, :5, :] - p2 = panel.iloc[:, 5:, :] - - result = concat([p1, p2], axis=1) - tm.assert_panel_equal(result, panel) - - p1 = panel.iloc[:, :, :2] - p2 = panel.iloc[:, :, 2:] - - result = concat([p1, p2], axis=2) - tm.assert_panel_equal(result, panel) - - # if things are a bit misbehaved - p1 = panel.iloc[:2, :, :2] - p2 = panel.iloc[:, :, 2:] - p1['ItemC'] = 'baz' - - result = concat([p1, p2], axis=2) - - expected = panel.copy() - expected['ItemC'] = expected['ItemC'].astype('O') - expected.loc['ItemC', :, :2] = 'baz' - tm.assert_panel_equal(result, expected) - @pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning") # Panel.rename warning we don't care about @pytest.mark.filterwarnings("ignore:Using:FutureWarning") diff --git a/pandas/tests/series/test_dtypes.py b/pandas/tests/series/test_dtypes.py index e29974f56967f..d8046c4944afc 100644 --- a/pandas/tests/series/test_dtypes.py +++ b/pandas/tests/series/test_dtypes.py @@ -291,8 +291,8 @@ def test_astype_categorical_to_other(self): expected = s tm.assert_series_equal(s.astype('category'), expected) tm.assert_series_equal(s.astype(CategoricalDtype()), expected) - msg = (r"could not convert string to float: '(0 - 499|9500 - 9999)'|" - r"invalid literal for float\(\): (0 - 499|9500 - 9999)") + msg = (r"could not convert string to float|" + r"invalid literal for float\(\)") with pytest.raises(ValueError, match=msg): s.astype('float64') diff --git a/pandas/tests/series/test_duplicates.py b/pandas/tests/series/test_duplicates.py index fe47975711a17..a975edacc19c7 100644 --- a/pandas/tests/series/test_duplicates.py +++ b/pandas/tests/series/test_duplicates.py @@ -59,12 +59,18 @@ def test_unique_data_ownership(): Series(Series(["a", "c", "b"]).unique()).sort_values() -def test_is_unique(): - # GH11946 - s = Series(np.random.randint(0, 10, size=1000)) - assert s.is_unique is False - s = Series(np.arange(1000)) - assert s.is_unique is True +@pytest.mark.parametrize('data, expected', [ + (np.random.randint(0, 10, size=1000), False), + (np.arange(1000), True), + ([], True), + ([np.nan], True), + (['foo', 'bar', np.nan], True), + (['foo', 'foo', np.nan], False), + (['foo', 'bar', np.nan, np.nan], False)]) +def test_is_unique(data, expected): + # GH11946 / GH25180 + s = Series(data) + assert s.is_unique is expected def test_is_unique_class_ne(capsys): diff --git a/pandas/tests/test_panel.py b/pandas/tests/test_panel.py index 1fd791fc81009..5d8de3e1f87d5 100644 --- a/pandas/tests/test_panel.py +++ b/pandas/tests/test_panel.py @@ -1858,21 +1858,6 @@ def test_shift(self): assert_series_equal(mixed_panel.dtypes, shifted.dtypes) def test_tshift(self): - # PeriodIndex - ps = tm.makePeriodPanel() - shifted = ps.tshift(1) - unshifted = shifted.tshift(-1) - - assert_panel_equal(unshifted, ps) - - shifted2 = ps.tshift(freq='B') - assert_panel_equal(shifted, shifted2) - - shifted3 = ps.tshift(freq=BDay()) - assert_panel_equal(shifted, shifted3) - - with pytest.raises(ValueError, match='does not match'): - ps.tshift(freq='M') # DatetimeIndex panel = make_test_panel() diff --git a/pandas/tests/util/test_hashing.py b/pandas/tests/util/test_hashing.py index d36de931e2610..c80b4483c0482 100644 --- a/pandas/tests/util/test_hashing.py +++ b/pandas/tests/util/test_hashing.py @@ -257,8 +257,7 @@ def test_categorical_with_nan_consistency(): assert result[1] in expected -@pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning") -@pytest.mark.parametrize("obj", [pd.Timestamp("20130101"), tm.makePanel()]) +@pytest.mark.parametrize("obj", [pd.Timestamp("20130101")]) def test_pandas_errors(obj): msg = "Unexpected type for hashing" with pytest.raises(TypeError, match=msg): diff --git a/pandas/util/move.c b/pandas/util/move.c index 62860adb1c1f6..9bb662d50cb3f 100644 --- a/pandas/util/move.c +++ b/pandas/util/move.c @@ -1,3 +1,12 @@ +/* +Copyright (c) 2019, PyData Development Team +All rights reserved. + +Distributed under the terms of the BSD Simplified License. + +The full license is in the LICENSE file, distributed with this software. +*/ + #include #define COMPILING_IN_PY2 (PY_VERSION_HEX <= 0x03000000) @@ -31,15 +40,13 @@ typedef struct { static PyTypeObject stolenbuf_type; /* forward declare type */ static void -stolenbuf_dealloc(stolenbufobject *self) -{ +stolenbuf_dealloc(stolenbufobject *self) { Py_DECREF(self->invalid_bytes); PyObject_Del(self); } static int -stolenbuf_getbuffer(stolenbufobject *self, Py_buffer *view, int flags) -{ +stolenbuf_getbuffer(stolenbufobject *self, Py_buffer *view, int flags) { return PyBuffer_FillInfo(view, (PyObject*) self, (void*) PyString_AS_STRING(self->invalid_bytes), @@ -51,8 +58,8 @@ stolenbuf_getbuffer(stolenbufobject *self, Py_buffer *view, int flags) #if COMPILING_IN_PY2 static Py_ssize_t -stolenbuf_getreadwritebuf(stolenbufobject *self, Py_ssize_t segment, void **out) -{ +stolenbuf_getreadwritebuf(stolenbufobject *self, + Py_ssize_t segment, void **out) { if (segment != 0) { PyErr_SetString(PyExc_SystemError, "accessing non-existent string segment"); @@ -63,8 +70,7 @@ stolenbuf_getreadwritebuf(stolenbufobject *self, Py_ssize_t segment, void **out) } static Py_ssize_t -stolenbuf_getsegcount(stolenbufobject *self, Py_ssize_t *len) -{ +stolenbuf_getsegcount(stolenbufobject *self, Py_ssize_t *len) { if (len) { *len = PyString_GET_SIZE(self->invalid_bytes); } @@ -157,8 +163,7 @@ PyDoc_STRVAR( however, if called through *unpacking like ``stolenbuf(*(a,))`` it would only have the one reference (the tuple). */ static PyObject* -move_into_mutable_buffer(PyObject *self, PyObject *bytes_rvalue) -{ +move_into_mutable_buffer(PyObject *self, PyObject *bytes_rvalue) { stolenbufobject *ret; if (!PyString_CheckExact(bytes_rvalue)) { diff --git a/pandas/util/testing.py b/pandas/util/testing.py index 47bde267156ed..053927a77c612 100644 --- a/pandas/util/testing.py +++ b/pandas/util/testing.py @@ -2060,14 +2060,6 @@ def makePanel(nper=None): return Panel.fromDict(data) -def makePeriodPanel(nper=None): - with warnings.catch_warnings(record=True): - warnings.filterwarnings("ignore", "\\nPanel", FutureWarning) - cols = ['Item' + c for c in string.ascii_uppercase[:K - 1]] - data = {c: makePeriodFrame(nper) for c in cols} - return Panel.fromDict(data) - - def makeCustomIndex(nentries, nlevels, prefix='#', names=False, ndupe_l=None, idx_type=None): """Create an index/multindex with given dimensions, levels, names, etc'