From 0a8e606637bfd8767f55972684d993f938781ae9 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Thu, 7 Feb 2019 01:23:52 +0000 Subject: [PATCH] TST: follow-up to Test nested pandas array #24993 (#25155) * revert changes to tests in gh-24993 * Test nested PandasArray * isort test_numpy.py * change NP_VERSION_INFO * use LooseVersion * add _np_version_under1p16 * remove blank line from merge master * add doctstrings to fixtures --- pandas/tests/extension/base/groupby.py | 17 +- pandas/tests/extension/base/methods.py | 6 - pandas/tests/extension/base/missing.py | 8 +- pandas/tests/extension/base/setitem.py | 1 - pandas/tests/extension/conftest.py | 57 +++ pandas/tests/extension/numpy_/__init__.py | 0 pandas/tests/extension/numpy_/conftest.py | 38 -- pandas/tests/extension/numpy_/test_numpy.py | 182 -------- .../extension/numpy_/test_numpy_nested.py | 286 ------------ pandas/tests/extension/test_numpy.py | 430 ++++++++++++++++++ pandas/tests/extension/test_sparse.py | 3 +- 11 files changed, 497 insertions(+), 531 deletions(-) delete mode 100644 pandas/tests/extension/numpy_/__init__.py delete mode 100644 pandas/tests/extension/numpy_/conftest.py delete mode 100644 pandas/tests/extension/numpy_/test_numpy.py delete mode 100644 pandas/tests/extension/numpy_/test_numpy_nested.py create mode 100644 pandas/tests/extension/test_numpy.py diff --git a/pandas/tests/extension/base/groupby.py b/pandas/tests/extension/base/groupby.py index dd406ca0cd5ed8..1929dad075695a 100644 --- a/pandas/tests/extension/base/groupby.py +++ b/pandas/tests/extension/base/groupby.py @@ -55,19 +55,14 @@ def test_groupby_extension_transform(self, data_for_grouping): self.assert_series_equal(result, expected) - @pytest.mark.parametrize('op', [ - lambda x: 1, - lambda x: [1] * len(x), - lambda x: pd.Series([1] * len(x)), - lambda x: x, - ], ids=['scalar', 'list', 'series', 'object']) - def test_groupby_extension_apply(self, data_for_grouping, op): + def test_groupby_extension_apply( + self, data_for_grouping, groupby_apply_op): df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4], "B": data_for_grouping}) - df.groupby("B").apply(op) - df.groupby("B").A.apply(op) - df.groupby("A").apply(op) - df.groupby("A").B.apply(op) + df.groupby("B").apply(groupby_apply_op) + df.groupby("B").A.apply(groupby_apply_op) + df.groupby("A").apply(groupby_apply_op) + df.groupby("A").B.apply(groupby_apply_op) def test_in_numeric_groupby(self, data_for_grouping): df = pd.DataFrame({"A": [1, 1, 2, 2, 3, 3, 1, 4], diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py index f64df7a84b7c00..1852edaa9e7485 100644 --- a/pandas/tests/extension/base/methods.py +++ b/pandas/tests/extension/base/methods.py @@ -240,7 +240,6 @@ def test_shift_fill_value(self, data): expected = data.take([2, 3, 0, 0]) self.assert_extension_array_equal(result, expected) - @pytest.mark.parametrize("as_frame", [True, False]) def test_hash_pandas_object_works(self, data, as_frame): # https://github.com/pandas-dev/pandas/issues/23066 data = pd.Series(data) @@ -250,7 +249,6 @@ def test_hash_pandas_object_works(self, data, as_frame): b = pd.util.hash_pandas_object(data) self.assert_equal(a, b) - @pytest.mark.parametrize("as_series", [True, False]) def test_searchsorted(self, data_for_sorting, as_series): b, c, a = data_for_sorting arr = type(data_for_sorting)._from_sequence([a, b, c]) @@ -275,7 +273,6 @@ def test_searchsorted(self, data_for_sorting, as_series): sorter = np.array([1, 2, 0]) assert data_for_sorting.searchsorted(a, sorter=sorter) == 0 - @pytest.mark.parametrize("as_frame", [True, False]) def test_where_series(self, data, na_value, as_frame): assert data[0] != data[1] cls = type(data) @@ -309,8 +306,6 @@ def test_where_series(self, data, na_value, as_frame): expected = expected.to_frame(name='a') self.assert_equal(result, expected) - @pytest.mark.parametrize("use_numpy", [True, False]) - @pytest.mark.parametrize("as_series", [True, False]) @pytest.mark.parametrize("repeats", [0, 1, 2, [1, 2, 3]]) def test_repeat(self, data, repeats, as_series, use_numpy): arr = type(data)._from_sequence(data[:3], dtype=data.dtype) @@ -327,7 +322,6 @@ def test_repeat(self, data, repeats, as_series, use_numpy): self.assert_equal(result, expected) - @pytest.mark.parametrize("use_numpy", [True, False]) @pytest.mark.parametrize('repeats, kwargs, error, msg', [ (2, dict(axis=1), ValueError, "'axis"), (-1, dict(), ValueError, "negative"), diff --git a/pandas/tests/extension/base/missing.py b/pandas/tests/extension/base/missing.py index 2fe547e50a34bf..834f49f0461f09 100644 --- a/pandas/tests/extension/base/missing.py +++ b/pandas/tests/extension/base/missing.py @@ -1,5 +1,4 @@ import numpy as np -import pytest import pandas as pd import pandas.util.testing as tm @@ -89,14 +88,13 @@ def test_fillna_series(self, data_missing): result = ser.fillna(ser) self.assert_series_equal(result, ser) - @pytest.mark.parametrize('method', ['ffill', 'bfill']) - def test_fillna_series_method(self, data_missing, method): + def test_fillna_series_method(self, data_missing, fillna_method): fill_value = data_missing[1] - if method == 'ffill': + if fillna_method == 'ffill': data_missing = data_missing[::-1] - result = pd.Series(data_missing).fillna(method=method) + result = pd.Series(data_missing).fillna(method=fillna_method) expected = pd.Series(data_missing._from_sequence( [fill_value, fill_value], dtype=data_missing.dtype)) diff --git a/pandas/tests/extension/base/setitem.py b/pandas/tests/extension/base/setitem.py index 42fda982f73390..db6328e39e6cc0 100644 --- a/pandas/tests/extension/base/setitem.py +++ b/pandas/tests/extension/base/setitem.py @@ -24,7 +24,6 @@ def test_setitem_sequence(self, data, box_in_series): assert data[0] == original[1] assert data[1] == original[0] - @pytest.mark.parametrize('as_array', [True, False]) def test_setitem_sequence_mismatched_length_raises(self, data, as_array): ser = pd.Series(data) original = ser.copy() diff --git a/pandas/tests/extension/conftest.py b/pandas/tests/extension/conftest.py index 5349dd919f2a2d..3cc2d313b09f51 100644 --- a/pandas/tests/extension/conftest.py +++ b/pandas/tests/extension/conftest.py @@ -2,6 +2,8 @@ import pytest +from pandas import Series + @pytest.fixture def dtype(): @@ -108,3 +110,58 @@ def data_for_grouping(): def box_in_series(request): """Whether to box the data in a Series""" return request.param + + +@pytest.fixture(params=[ + lambda x: 1, + lambda x: [1] * len(x), + lambda x: Series([1] * len(x)), + lambda x: x, +], ids=['scalar', 'list', 'series', 'object']) +def groupby_apply_op(request): + """ + Functions to test groupby.apply(). + """ + return request.param + + +@pytest.fixture(params=[True, False]) +def as_frame(request): + """ + Boolean fixture to support Series and Series.to_frame() comparison testing. + """ + return request.param + + +@pytest.fixture(params=[True, False]) +def as_series(request): + """ + Boolean fixture to support arr and Series(arr) comparison testing. + """ + return request.param + + +@pytest.fixture(params=[True, False]) +def use_numpy(request): + """ + Boolean fixture to support comparison testing of ExtensionDtype array + and numpy array. + """ + return request.param + + +@pytest.fixture(params=['ffill', 'bfill']) +def fillna_method(request): + """ + Parametrized fixture giving method parameters 'ffill' and 'bfill' for + Series.fillna(method=) testing. + """ + return request.param + + +@pytest.fixture(params=[True, False]) +def as_array(request): + """ + Boolean fixture to support ExtensionDtype _from_sequence method testing. + """ + return request.param diff --git a/pandas/tests/extension/numpy_/__init__.py b/pandas/tests/extension/numpy_/__init__.py deleted file mode 100644 index e69de29bb2d1d6..00000000000000 diff --git a/pandas/tests/extension/numpy_/conftest.py b/pandas/tests/extension/numpy_/conftest.py deleted file mode 100644 index daa93571c2957d..00000000000000 --- a/pandas/tests/extension/numpy_/conftest.py +++ /dev/null @@ -1,38 +0,0 @@ -import numpy as np -import pytest - -from pandas.core.arrays.numpy_ import PandasArray - - -@pytest.fixture -def allow_in_pandas(monkeypatch): - """ - A monkeypatch to tell pandas to let us in. - - By default, passing a PandasArray to an index / series / frame - constructor will unbox that PandasArray to an ndarray, and treat - it as a non-EA column. We don't want people using EAs without - reason. - - The mechanism for this is a check against ABCPandasArray - in each constructor. - - But, for testing, we need to allow them in pandas. So we patch - the _typ of PandasArray, so that we evade the ABCPandasArray - check. - """ - with monkeypatch.context() as m: - m.setattr(PandasArray, '_typ', 'extension') - yield - - -@pytest.fixture -def na_value(): - return np.nan - - -@pytest.fixture -def na_cmp(): - def cmp(a, b): - return np.isnan(a) and np.isnan(b) - return cmp diff --git a/pandas/tests/extension/numpy_/test_numpy.py b/pandas/tests/extension/numpy_/test_numpy.py deleted file mode 100644 index 4c93d5ee0b9d7c..00000000000000 --- a/pandas/tests/extension/numpy_/test_numpy.py +++ /dev/null @@ -1,182 +0,0 @@ -import numpy as np -import pytest - -import pandas as pd -from pandas import compat -from pandas.core.arrays.numpy_ import PandasArray, PandasDtype -import pandas.util.testing as tm - -from .. import base - - -@pytest.fixture -def dtype(): - return PandasDtype(np.dtype('float')) - - -@pytest.fixture -def data(allow_in_pandas, dtype): - return PandasArray(np.arange(1, 101, dtype=dtype._dtype)) - - -@pytest.fixture -def data_missing(allow_in_pandas): - return PandasArray(np.array([np.nan, 1.0])) - - -@pytest.fixture -def data_for_sorting(allow_in_pandas): - """Length-3 array with a known sort order. - - This should be three items [B, C, A] with - A < B < C - """ - return PandasArray( - np.array([1, 2, 0]) - ) - - -@pytest.fixture -def data_missing_for_sorting(allow_in_pandas): - """Length-3 array with a known sort order. - - This should be three items [B, NA, A] with - A < B and NA missing. - """ - return PandasArray( - np.array([1, np.nan, 0]) - ) - - -@pytest.fixture -def data_for_grouping(allow_in_pandas): - """Data for factorization, grouping, and unique tests. - - Expected to be like [B, B, NA, NA, A, A, B, C] - - Where A < B < C and NA is missing - """ - a, b, c = np.arange(3) - return PandasArray(np.array( - [b, b, np.nan, np.nan, a, a, b, c] - )) - - -class BaseNumPyTests(object): - pass - - -class TestCasting(BaseNumPyTests, base.BaseCastingTests): - pass - - -class TestConstructors(BaseNumPyTests, base.BaseConstructorsTests): - @pytest.mark.skip(reason="We don't register our dtype") - # We don't want to register. This test should probably be split in two. - def test_from_dtype(self, data): - pass - - -class TestDtype(BaseNumPyTests, base.BaseDtypeTests): - - @pytest.mark.skip(reason="Incorrect expected.") - # we unsurprisingly clash with a NumPy name. - def test_check_dtype(self, data): - pass - - -class TestGetitem(BaseNumPyTests, base.BaseGetitemTests): - pass - - -class TestGroupby(BaseNumPyTests, base.BaseGroupbyTests): - pass - - -class TestInterface(BaseNumPyTests, base.BaseInterfaceTests): - pass - - -class TestMethods(BaseNumPyTests, base.BaseMethodsTests): - - @pytest.mark.skip(reason="TODO: remove?") - def test_value_counts(self, all_data, dropna): - pass - - @pytest.mark.skip(reason="Incorrect expected") - # We have a bool dtype, so the result is an ExtensionArray - # but expected is not - def test_combine_le(self, data_repeated): - super(TestMethods, self).test_combine_le(data_repeated) - - -class TestArithmetics(BaseNumPyTests, base.BaseArithmeticOpsTests): - divmod_exc = None - series_scalar_exc = None - frame_scalar_exc = None - series_array_exc = None - - def test_divmod_series_array(self, data): - s = pd.Series(data) - self._check_divmod_op(s, divmod, data, exc=None) - - @pytest.mark.skip("We implement ops") - def test_error(self, data, all_arithmetic_operators): - pass - - def test_arith_series_with_scalar(self, data, all_arithmetic_operators): - if (compat.PY2 and - all_arithmetic_operators in {'__div__', '__rdiv__'}): - raise pytest.skip( - "Matching NumPy int / int -> float behavior." - ) - super(TestArithmetics, self).test_arith_series_with_scalar( - data, all_arithmetic_operators - ) - - def test_arith_series_with_array(self, data, all_arithmetic_operators): - if (compat.PY2 and - all_arithmetic_operators in {'__div__', '__rdiv__'}): - raise pytest.skip( - "Matching NumPy int / int -> float behavior." - ) - super(TestArithmetics, self).test_arith_series_with_array( - data, all_arithmetic_operators - ) - - -class TestPrinting(BaseNumPyTests, base.BasePrintingTests): - pass - - -class TestNumericReduce(BaseNumPyTests, base.BaseNumericReduceTests): - - def check_reduce(self, s, op_name, skipna): - result = getattr(s, op_name)(skipna=skipna) - # avoid coercing int -> float. Just cast to the actual numpy type. - expected = getattr(s.astype(s.dtype._dtype), op_name)(skipna=skipna) - tm.assert_almost_equal(result, expected) - - -class TestBooleanReduce(BaseNumPyTests, base.BaseBooleanReduceTests): - pass - - -class TestMising(BaseNumPyTests, base.BaseMissingTests): - pass - - -class TestReshaping(BaseNumPyTests, base.BaseReshapingTests): - - @pytest.mark.skip("Incorrect parent test") - # not actually a mixed concat, since we concat int and int. - def test_concat_mixed_dtypes(self, data): - super(TestReshaping, self).test_concat_mixed_dtypes(data) - - -class TestSetitem(BaseNumPyTests, base.BaseSetitemTests): - pass - - -class TestParsing(BaseNumPyTests, base.BaseParsingTests): - pass diff --git a/pandas/tests/extension/numpy_/test_numpy_nested.py b/pandas/tests/extension/numpy_/test_numpy_nested.py deleted file mode 100644 index cf9b34dd08798c..00000000000000 --- a/pandas/tests/extension/numpy_/test_numpy_nested.py +++ /dev/null @@ -1,286 +0,0 @@ -""" -Tests for PandasArray with nested data. Users typically won't create -these objects via `pd.array`, but they can show up through `.array` -on a Series with nested data. - -We partition these tests into their own file, as many of the base -tests fail, as they aren't appropriate for nested data. It is easier -to have a seperate file with its own data generating fixtures, than -trying to skip based upon the value of a fixture. -""" -import pytest - -import pandas as pd -from pandas.core.arrays.numpy_ import PandasArray, PandasDtype - -from .. import base - -# For NumPy <1.16, np.array([np.nan, (1,)]) raises -# ValueError: setting an array element with a sequence. -np = pytest.importorskip('numpy', minversion='1.16.0') - - -@pytest.fixture -def dtype(): - return PandasDtype(np.dtype('object')) - - -@pytest.fixture -def data(allow_in_pandas, dtype): - return pd.Series([(i,) for i in range(100)]).array - - -@pytest.fixture -def data_missing(allow_in_pandas): - return PandasArray(np.array([np.nan, (1,)])) - - -@pytest.fixture -def data_for_sorting(allow_in_pandas): - """Length-3 array with a known sort order. - - This should be three items [B, C, A] with - A < B < C - """ - # Use an empty tuple for first element, then remove, - # to disable np.array's shape inference. - return PandasArray( - np.array([(), (2,), (3,), (1,)])[1:] - ) - - -@pytest.fixture -def data_missing_for_sorting(allow_in_pandas): - """Length-3 array with a known sort order. - - This should be three items [B, NA, A] with - A < B and NA missing. - """ - return PandasArray( - np.array([(1,), np.nan, (0,)]) - ) - - -@pytest.fixture -def data_for_grouping(allow_in_pandas): - """Data for factorization, grouping, and unique tests. - - Expected to be like [B, B, NA, NA, A, A, B, C] - - Where A < B < C and NA is missing - """ - a, b, c = (1,), (2,), (3,) - return PandasArray(np.array( - [b, b, np.nan, np.nan, a, a, b, c] - )) - - -skip_nested = pytest.mark.skip(reason="Skipping for nested PandasArray") - - -class BaseNumPyTests(object): - pass - - -class TestCasting(BaseNumPyTests, base.BaseCastingTests): - - @skip_nested - def test_astype_str(self, data): - pass - - -class TestConstructors(BaseNumPyTests, base.BaseConstructorsTests): - @pytest.mark.skip(reason="We don't register our dtype") - # We don't want to register. This test should probably be split in two. - def test_from_dtype(self, data): - pass - - @skip_nested - def test_array_from_scalars(self, data): - pass - - -class TestDtype(BaseNumPyTests, base.BaseDtypeTests): - - @pytest.mark.skip(reason="Incorrect expected.") - # we unsurprisingly clash with a NumPy name. - def test_check_dtype(self, data): - pass - - -class TestGetitem(BaseNumPyTests, base.BaseGetitemTests): - - @skip_nested - def test_getitem_scalar(self, data): - pass - - @skip_nested - def test_take_series(self, data): - pass - - -class TestGroupby(BaseNumPyTests, base.BaseGroupbyTests): - @skip_nested - def test_groupby_extension_apply(self, data_for_grouping, op): - pass - - -class TestInterface(BaseNumPyTests, base.BaseInterfaceTests): - @skip_nested - def test_array_interface(self, data): - # NumPy array shape inference - pass - - -class TestMethods(BaseNumPyTests, base.BaseMethodsTests): - - @pytest.mark.skip(reason="TODO: remove?") - def test_value_counts(self, all_data, dropna): - pass - - @pytest.mark.skip(reason="Incorrect expected") - # We have a bool dtype, so the result is an ExtensionArray - # but expected is not - def test_combine_le(self, data_repeated): - super(TestMethods, self).test_combine_le(data_repeated) - - @skip_nested - def test_combine_add(self, data_repeated): - # Not numeric - pass - - @skip_nested - def test_shift_fill_value(self, data): - # np.array shape inference. Shift implementation fails. - super().test_shift_fill_value(data) - - @skip_nested - def test_unique(self, data, box, method): - # Fails creating expected - pass - - @skip_nested - def test_fillna_copy_frame(self, data_missing): - # The "scalar" for this array isn't a scalar. - pass - - @skip_nested - def test_fillna_copy_series(self, data_missing): - # The "scalar" for this array isn't a scalar. - pass - - @skip_nested - def test_hash_pandas_object_works(self, data, as_frame): - # ndarray of tuples not hashable - pass - - @skip_nested - def test_searchsorted(self, data_for_sorting, as_series): - # Test setup fails. - pass - - @skip_nested - def test_where_series(self, data, na_value, as_frame): - # Test setup fails. - pass - - @skip_nested - def test_repeat(self, data, repeats, as_series, use_numpy): - # Fails creating expected - pass - - -class TestPrinting(BaseNumPyTests, base.BasePrintingTests): - pass - - -class TestMissing(BaseNumPyTests, base.BaseMissingTests): - - @skip_nested - def test_fillna_scalar(self, data_missing): - # Non-scalar "scalar" values. - pass - - @skip_nested - def test_fillna_series_method(self, data_missing, method): - # Non-scalar "scalar" values. - pass - - @skip_nested - def test_fillna_series(self, data_missing): - # Non-scalar "scalar" values. - pass - - @skip_nested - def test_fillna_frame(self, data_missing): - # Non-scalar "scalar" values. - pass - - -class TestReshaping(BaseNumPyTests, base.BaseReshapingTests): - - @pytest.mark.skip("Incorrect parent test") - # not actually a mixed concat, since we concat int and int. - def test_concat_mixed_dtypes(self, data): - super(TestReshaping, self).test_concat_mixed_dtypes(data) - - @skip_nested - def test_merge(self, data, na_value): - # Fails creating expected - pass - - @skip_nested - def test_merge_on_extension_array(self, data): - # Fails creating expected - pass - - @skip_nested - def test_merge_on_extension_array_duplicates(self, data): - # Fails creating expected - pass - - -class TestSetitem(BaseNumPyTests, base.BaseSetitemTests): - - @skip_nested - def test_setitem_scalar_series(self, data, box_in_series): - pass - - @skip_nested - def test_setitem_sequence(self, data, box_in_series): - pass - - @skip_nested - def test_setitem_sequence_mismatched_length_raises(self, data, as_array): - pass - - @skip_nested - def test_setitem_sequence_broadcasts(self, data, box_in_series): - pass - - @skip_nested - def test_setitem_loc_scalar_mixed(self, data): - pass - - @skip_nested - def test_setitem_loc_scalar_multiple_homogoneous(self, data): - pass - - @skip_nested - def test_setitem_iloc_scalar_mixed(self, data): - pass - - @skip_nested - def test_setitem_iloc_scalar_multiple_homogoneous(self, data): - pass - - @skip_nested - def test_setitem_mask_broadcast(self, data, setter): - pass - - @skip_nested - def test_setitem_scalar_key_sequence_raise(self, data): - pass - - -# Skip Arithmetics, NumericReduce, BooleanReduce, Parsing diff --git a/pandas/tests/extension/test_numpy.py b/pandas/tests/extension/test_numpy.py new file mode 100644 index 00000000000000..41f5beb8c885d6 --- /dev/null +++ b/pandas/tests/extension/test_numpy.py @@ -0,0 +1,430 @@ +import numpy as np +import pytest + +from pandas.compat.numpy import _np_version_under1p16 + +import pandas as pd +from pandas import compat +from pandas.core.arrays.numpy_ import PandasArray, PandasDtype +import pandas.util.testing as tm + +from . import base + + +@pytest.fixture(params=['float', 'object']) +def dtype(request): + return PandasDtype(np.dtype(request.param)) + + +@pytest.fixture +def allow_in_pandas(monkeypatch): + """ + A monkeypatch to tells pandas to let us in. + + By default, passing a PandasArray to an index / series / frame + constructor will unbox that PandasArray to an ndarray, and treat + it as a non-EA column. We don't want people using EAs without + reason. + + The mechanism for this is a check against ABCPandasArray + in each constructor. + + But, for testing, we need to allow them in pandas. So we patch + the _typ of PandasArray, so that we evade the ABCPandasArray + check. + """ + with monkeypatch.context() as m: + m.setattr(PandasArray, '_typ', 'extension') + yield + + +@pytest.fixture +def data(allow_in_pandas, dtype): + if dtype.numpy_dtype == 'object': + return pd.Series([(i,) for i in range(100)]).array + return PandasArray(np.arange(1, 101, dtype=dtype._dtype)) + + +@pytest.fixture +def data_missing(allow_in_pandas, dtype): + # For NumPy <1.16, np.array([np.nan, (1,)]) raises + # ValueError: setting an array element with a sequence. + if dtype.numpy_dtype == 'object': + if _np_version_under1p16: + raise pytest.skip("Skipping for NumPy <1.16") + return PandasArray(np.array([np.nan, (1,)])) + return PandasArray(np.array([np.nan, 1.0])) + + +@pytest.fixture +def na_value(): + return np.nan + + +@pytest.fixture +def na_cmp(): + def cmp(a, b): + return np.isnan(a) and np.isnan(b) + return cmp + + +@pytest.fixture +def data_for_sorting(allow_in_pandas, dtype): + """Length-3 array with a known sort order. + + This should be three items [B, C, A] with + A < B < C + """ + if dtype.numpy_dtype == 'object': + # Use an empty tuple for first element, then remove, + # to disable np.array's shape inference. + return PandasArray( + np.array([(), (2,), (3,), (1,)])[1:] + ) + return PandasArray( + np.array([1, 2, 0]) + ) + + +@pytest.fixture +def data_missing_for_sorting(allow_in_pandas, dtype): + """Length-3 array with a known sort order. + + This should be three items [B, NA, A] with + A < B and NA missing. + """ + if dtype.numpy_dtype == 'object': + return PandasArray( + np.array([(1,), np.nan, (0,)]) + ) + return PandasArray( + np.array([1, np.nan, 0]) + ) + + +@pytest.fixture +def data_for_grouping(allow_in_pandas, dtype): + """Data for factorization, grouping, and unique tests. + + Expected to be like [B, B, NA, NA, A, A, B, C] + + Where A < B < C and NA is missing + """ + if dtype.numpy_dtype == 'object': + a, b, c = (1,), (2,), (3,) + else: + a, b, c = np.arange(3) + return PandasArray(np.array( + [b, b, np.nan, np.nan, a, a, b, c] + )) + + +@pytest.fixture +def skip_numpy_object(dtype): + """ + Tests for PandasArray with nested data. Users typically won't create + these objects via `pd.array`, but they can show up through `.array` + on a Series with nested data. Many of the base tests fail, as they aren't + appropriate for nested data. + + This fixture allows these tests to be skipped when used as a usefixtures + marker to either an individual test or a test class. + """ + if dtype == 'object': + raise pytest.skip("Skipping for object dtype.") + + +skip_nested = pytest.mark.usefixtures('skip_numpy_object') + + +class BaseNumPyTests(object): + pass + + +class TestCasting(BaseNumPyTests, base.BaseCastingTests): + + @skip_nested + def test_astype_str(self, data): + # ValueError: setting an array element with a sequence + super(TestCasting, self).test_astype_str(data) + + +class TestConstructors(BaseNumPyTests, base.BaseConstructorsTests): + @pytest.mark.skip(reason="We don't register our dtype") + # We don't want to register. This test should probably be split in two. + def test_from_dtype(self, data): + pass + + @skip_nested + def test_array_from_scalars(self, data): + # ValueError: PandasArray must be 1-dimensional. + super(TestConstructors, self).test_array_from_scalars(data) + + +class TestDtype(BaseNumPyTests, base.BaseDtypeTests): + + @pytest.mark.skip(reason="Incorrect expected.") + # we unsurprisingly clash with a NumPy name. + def test_check_dtype(self, data): + pass + + +class TestGetitem(BaseNumPyTests, base.BaseGetitemTests): + + @skip_nested + def test_getitem_scalar(self, data): + # AssertionError + super(TestGetitem, self).test_getitem_scalar(data) + + @skip_nested + def test_take_series(self, data): + # ValueError: PandasArray must be 1-dimensional. + super(TestGetitem, self).test_take_series(data) + + +class TestGroupby(BaseNumPyTests, base.BaseGroupbyTests): + @skip_nested + def test_groupby_extension_apply( + self, data_for_grouping, groupby_apply_op): + # ValueError: Names should be list-like for a MultiIndex + super(TestGroupby, self).test_groupby_extension_apply( + data_for_grouping, groupby_apply_op) + + +class TestInterface(BaseNumPyTests, base.BaseInterfaceTests): + @skip_nested + def test_array_interface(self, data): + # NumPy array shape inference + super(TestInterface, self).test_array_interface(data) + + +class TestMethods(BaseNumPyTests, base.BaseMethodsTests): + + @pytest.mark.skip(reason="TODO: remove?") + def test_value_counts(self, all_data, dropna): + pass + + @pytest.mark.skip(reason="Incorrect expected") + # We have a bool dtype, so the result is an ExtensionArray + # but expected is not + def test_combine_le(self, data_repeated): + super(TestMethods, self).test_combine_le(data_repeated) + + @skip_nested + def test_combine_add(self, data_repeated): + # Not numeric + super(TestMethods, self).test_combine_add(data_repeated) + + @skip_nested + def test_shift_fill_value(self, data): + # np.array shape inference. Shift implementation fails. + super(TestMethods, self).test_shift_fill_value(data) + + @skip_nested + @pytest.mark.parametrize('box', [pd.Series, lambda x: x]) + @pytest.mark.parametrize('method', [lambda x: x.unique(), pd.unique]) + def test_unique(self, data, box, method): + # Fails creating expected + super(TestMethods, self).test_unique(data, box, method) + + @skip_nested + def test_fillna_copy_frame(self, data_missing): + # The "scalar" for this array isn't a scalar. + super(TestMethods, self).test_fillna_copy_frame(data_missing) + + @skip_nested + def test_fillna_copy_series(self, data_missing): + # The "scalar" for this array isn't a scalar. + super(TestMethods, self).test_fillna_copy_series(data_missing) + + @skip_nested + def test_hash_pandas_object_works(self, data, as_frame): + # ndarray of tuples not hashable + super(TestMethods, self).test_hash_pandas_object_works(data, as_frame) + + @skip_nested + def test_searchsorted(self, data_for_sorting, as_series): + # Test setup fails. + super(TestMethods, self).test_searchsorted(data_for_sorting, as_series) + + @skip_nested + def test_where_series(self, data, na_value, as_frame): + # Test setup fails. + super(TestMethods, self).test_where_series(data, na_value, as_frame) + + @skip_nested + @pytest.mark.parametrize("repeats", [0, 1, 2, [1, 2, 3]]) + def test_repeat(self, data, repeats, as_series, use_numpy): + # Fails creating expected + super(TestMethods, self).test_repeat( + data, repeats, as_series, use_numpy) + + +@skip_nested +class TestArithmetics(BaseNumPyTests, base.BaseArithmeticOpsTests): + divmod_exc = None + series_scalar_exc = None + frame_scalar_exc = None + series_array_exc = None + + def test_divmod_series_array(self, data): + s = pd.Series(data) + self._check_divmod_op(s, divmod, data, exc=None) + + @pytest.mark.skip("We implement ops") + def test_error(self, data, all_arithmetic_operators): + pass + + def test_arith_series_with_scalar(self, data, all_arithmetic_operators): + if (compat.PY2 and + all_arithmetic_operators in {'__div__', '__rdiv__'}): + raise pytest.skip( + "Matching NumPy int / int -> float behavior." + ) + super(TestArithmetics, self).test_arith_series_with_scalar( + data, all_arithmetic_operators + ) + + def test_arith_series_with_array(self, data, all_arithmetic_operators): + if (compat.PY2 and + all_arithmetic_operators in {'__div__', '__rdiv__'}): + raise pytest.skip( + "Matching NumPy int / int -> float behavior." + ) + super(TestArithmetics, self).test_arith_series_with_array( + data, all_arithmetic_operators + ) + + +class TestPrinting(BaseNumPyTests, base.BasePrintingTests): + pass + + +@skip_nested +class TestNumericReduce(BaseNumPyTests, base.BaseNumericReduceTests): + + def check_reduce(self, s, op_name, skipna): + result = getattr(s, op_name)(skipna=skipna) + # avoid coercing int -> float. Just cast to the actual numpy type. + expected = getattr(s.astype(s.dtype._dtype), op_name)(skipna=skipna) + tm.assert_almost_equal(result, expected) + + +@skip_nested +class TestBooleanReduce(BaseNumPyTests, base.BaseBooleanReduceTests): + pass + + +class TestMissing(BaseNumPyTests, base.BaseMissingTests): + + @skip_nested + def test_fillna_scalar(self, data_missing): + # Non-scalar "scalar" values. + super(TestMissing, self).test_fillna_scalar(data_missing) + + @skip_nested + def test_fillna_series_method(self, data_missing, fillna_method): + # Non-scalar "scalar" values. + super(TestMissing, self).test_fillna_series_method( + data_missing, fillna_method) + + @skip_nested + def test_fillna_series(self, data_missing): + # Non-scalar "scalar" values. + super(TestMissing, self).test_fillna_series(data_missing) + + @skip_nested + def test_fillna_frame(self, data_missing): + # Non-scalar "scalar" values. + super(TestMissing, self).test_fillna_frame(data_missing) + + +class TestReshaping(BaseNumPyTests, base.BaseReshapingTests): + + @pytest.mark.skip("Incorrect parent test") + # not actually a mixed concat, since we concat int and int. + def test_concat_mixed_dtypes(self, data): + super(TestReshaping, self).test_concat_mixed_dtypes(data) + + @skip_nested + def test_merge(self, data, na_value): + # Fails creating expected + super(TestReshaping, self).test_merge(data, na_value) + + @skip_nested + def test_merge_on_extension_array(self, data): + # Fails creating expected + super(TestReshaping, self).test_merge_on_extension_array(data) + + @skip_nested + def test_merge_on_extension_array_duplicates(self, data): + # Fails creating expected + super(TestReshaping, self).test_merge_on_extension_array_duplicates( + data) + + +class TestSetitem(BaseNumPyTests, base.BaseSetitemTests): + + @skip_nested + def test_setitem_scalar_series(self, data, box_in_series): + # AssertionError + super(TestSetitem, self).test_setitem_scalar_series( + data, box_in_series) + + @skip_nested + def test_setitem_sequence(self, data, box_in_series): + # ValueError: shape mismatch: value array of shape (2,1) could not + # be broadcast to indexing result of shape (2,) + super(TestSetitem, self).test_setitem_sequence(data, box_in_series) + + @skip_nested + def test_setitem_sequence_mismatched_length_raises(self, data, as_array): + # ValueError: PandasArray must be 1-dimensional. + (super(TestSetitem, self). + test_setitem_sequence_mismatched_length_raises(data, as_array)) + + @skip_nested + def test_setitem_sequence_broadcasts(self, data, box_in_series): + # ValueError: cannot set using a list-like indexer with a different + # length than the value + super(TestSetitem, self).test_setitem_sequence_broadcasts( + data, box_in_series) + + @skip_nested + def test_setitem_loc_scalar_mixed(self, data): + # AssertionError + super(TestSetitem, self).test_setitem_loc_scalar_mixed(data) + + @skip_nested + def test_setitem_loc_scalar_multiple_homogoneous(self, data): + # AssertionError + super(TestSetitem, self).test_setitem_loc_scalar_multiple_homogoneous( + data) + + @skip_nested + def test_setitem_iloc_scalar_mixed(self, data): + # AssertionError + super(TestSetitem, self).test_setitem_iloc_scalar_mixed(data) + + @skip_nested + def test_setitem_iloc_scalar_multiple_homogoneous(self, data): + # AssertionError + super(TestSetitem, self).test_setitem_iloc_scalar_multiple_homogoneous( + data) + + @skip_nested + @pytest.mark.parametrize('setter', ['loc', None]) + def test_setitem_mask_broadcast(self, data, setter): + # ValueError: cannot set using a list-like indexer with a different + # length than the value + super(TestSetitem, self).test_setitem_mask_broadcast(data, setter) + + @skip_nested + def test_setitem_scalar_key_sequence_raise(self, data): + # Failed: DID NOT RAISE + super(TestSetitem, self).test_setitem_scalar_key_sequence_raise(data) + + +@skip_nested +class TestParsing(BaseNumPyTests, base.BaseParsingTests): + pass diff --git a/pandas/tests/extension/test_sparse.py b/pandas/tests/extension/test_sparse.py index 21dbf9524961cd..146dea2b65d83d 100644 --- a/pandas/tests/extension/test_sparse.py +++ b/pandas/tests/extension/test_sparse.py @@ -287,11 +287,10 @@ def test_combine_first(self, data): pytest.skip("TODO(SparseArray.__setitem__ will preserve dtype.") super(TestMethods, self).test_combine_first(data) - @pytest.mark.parametrize("as_series", [True, False]) def test_searchsorted(self, data_for_sorting, as_series): with tm.assert_produces_warning(PerformanceWarning): super(TestMethods, self).test_searchsorted(data_for_sorting, - as_series=as_series) + as_series) class TestCasting(BaseSparseTests, base.BaseCastingTests):