diff --git a/pandas/tests/series/indexing/test_boolean.py b/pandas/tests/series/indexing/test_boolean.py index 16a29d10eb414..28f3c0f7429f8 100644 --- a/pandas/tests/series/indexing/test_boolean.py +++ b/pandas/tests/series/indexing/test_boolean.py @@ -1,10 +1,7 @@ import numpy as np import pytest -from pandas.core.dtypes.common import is_integer - -import pandas as pd -from pandas import Index, Series, Timestamp, date_range, isna +from pandas import Index, Series import pandas._testing as tm from pandas.core.indexing import IndexingError @@ -136,492 +133,3 @@ def test_get_set_boolean_different_order(string_series): sel = string_series[ordered > 0] exp = string_series[string_series > 0] tm.assert_series_equal(sel, exp) - - -def test_where_unsafe_int(sint_dtype): - s = Series(np.arange(10), dtype=sint_dtype) - mask = s < 5 - - s[mask] = range(2, 7) - expected = Series(list(range(2, 7)) + list(range(5, 10)), dtype=sint_dtype) - - tm.assert_series_equal(s, expected) - - -def test_where_unsafe_float(float_dtype): - s = Series(np.arange(10), dtype=float_dtype) - mask = s < 5 - - s[mask] = range(2, 7) - data = list(range(2, 7)) + list(range(5, 10)) - expected = Series(data, dtype=float_dtype) - - tm.assert_series_equal(s, expected) - - -@pytest.mark.parametrize( - "dtype,expected_dtype", - [ - (np.int8, np.float64), - (np.int16, np.float64), - (np.int32, np.float64), - (np.int64, np.float64), - (np.float32, np.float32), - (np.float64, np.float64), - ], -) -def test_where_unsafe_upcast(dtype, expected_dtype): - # see gh-9743 - s = Series(np.arange(10), dtype=dtype) - values = [2.5, 3.5, 4.5, 5.5, 6.5] - mask = s < 5 - expected = Series(values + list(range(5, 10)), dtype=expected_dtype) - s[mask] = values - tm.assert_series_equal(s, expected) - - -def test_where_unsafe(): - # see gh-9731 - s = Series(np.arange(10), dtype="int64") - values = [2.5, 3.5, 4.5, 5.5] - - mask = s > 5 - expected = Series(list(range(6)) + values, dtype="float64") - - s[mask] = values - tm.assert_series_equal(s, expected) - - # see gh-3235 - s = Series(np.arange(10), dtype="int64") - mask = s < 5 - s[mask] = range(2, 7) - expected = Series(list(range(2, 7)) + list(range(5, 10)), dtype="int64") - tm.assert_series_equal(s, expected) - assert s.dtype == expected.dtype - - s = Series(np.arange(10), dtype="int64") - mask = s > 5 - s[mask] = [0] * 4 - expected = Series([0, 1, 2, 3, 4, 5] + [0] * 4, dtype="int64") - tm.assert_series_equal(s, expected) - - s = Series(np.arange(10)) - mask = s > 5 - - msg = "cannot assign mismatch length to masked array" - with pytest.raises(ValueError, match=msg): - s[mask] = [5, 4, 3, 2, 1] - - with pytest.raises(ValueError, match=msg): - s[mask] = [0] * 5 - - # dtype changes - s = Series([1, 2, 3, 4]) - result = s.where(s > 2, np.nan) - expected = Series([np.nan, np.nan, 3, 4]) - tm.assert_series_equal(result, expected) - - # GH 4667 - # setting with None changes dtype - s = Series(range(10)).astype(float) - s[8] = None - result = s[8] - assert isna(result) - - s = Series(range(10)).astype(float) - s[s > 8] = None - result = s[isna(s)] - expected = Series(np.nan, index=[9]) - tm.assert_series_equal(result, expected) - - -def test_where(): - s = Series(np.random.randn(5)) - cond = s > 0 - - rs = s.where(cond).dropna() - rs2 = s[cond] - tm.assert_series_equal(rs, rs2) - - rs = s.where(cond, -s) - tm.assert_series_equal(rs, s.abs()) - - rs = s.where(cond) - assert s.shape == rs.shape - assert rs is not s - - # test alignment - cond = Series([True, False, False, True, False], index=s.index) - s2 = -(s.abs()) - - expected = s2[cond].reindex(s2.index[:3]).reindex(s2.index) - rs = s2.where(cond[:3]) - tm.assert_series_equal(rs, expected) - - expected = s2.abs() - expected.iloc[0] = s2[0] - rs = s2.where(cond[:3], -s2) - tm.assert_series_equal(rs, expected) - - -def test_where_error(): - s = Series(np.random.randn(5)) - cond = s > 0 - - msg = "Array conditional must be same shape as self" - with pytest.raises(ValueError, match=msg): - s.where(1) - with pytest.raises(ValueError, match=msg): - s.where(cond[:3].values, -s) - - # GH 2745 - s = Series([1, 2]) - s[[True, False]] = [0, 1] - expected = Series([0, 2]) - tm.assert_series_equal(s, expected) - - # failures - msg = "cannot assign mismatch length to masked array" - with pytest.raises(ValueError, match=msg): - s[[True, False]] = [0, 2, 3] - msg = ( - "NumPy boolean array indexing assignment cannot assign 0 input " - "values to the 1 output values where the mask is true" - ) - with pytest.raises(ValueError, match=msg): - s[[True, False]] = [] - - -@pytest.mark.parametrize("klass", [list, tuple, np.array, Series]) -def test_where_array_like(klass): - # see gh-15414 - s = Series([1, 2, 3]) - cond = [False, True, True] - expected = Series([np.nan, 2, 3]) - - result = s.where(klass(cond)) - tm.assert_series_equal(result, expected) - - -@pytest.mark.parametrize( - "cond", - [ - [1, 0, 1], - Series([2, 5, 7]), - ["True", "False", "True"], - [Timestamp("2017-01-01"), pd.NaT, Timestamp("2017-01-02")], - ], -) -def test_where_invalid_input(cond): - # see gh-15414: only boolean arrays accepted - s = Series([1, 2, 3]) - msg = "Boolean array expected for the condition" - - with pytest.raises(ValueError, match=msg): - s.where(cond) - - msg = "Array conditional must be same shape as self" - with pytest.raises(ValueError, match=msg): - s.where([True]) - - -def test_where_ndframe_align(): - msg = "Array conditional must be same shape as self" - s = Series([1, 2, 3]) - - cond = [True] - with pytest.raises(ValueError, match=msg): - s.where(cond) - - expected = Series([1, np.nan, np.nan]) - - out = s.where(Series(cond)) - tm.assert_series_equal(out, expected) - - cond = np.array([False, True, False, True]) - with pytest.raises(ValueError, match=msg): - s.where(cond) - - expected = Series([np.nan, 2, np.nan]) - - out = s.where(Series(cond)) - tm.assert_series_equal(out, expected) - - -def test_where_setitem_invalid(): - # GH 2702 - # make sure correct exceptions are raised on invalid list assignment - - msg = "cannot set using a {} indexer with a different length than the value" - - # slice - s = Series(list("abc")) - - with pytest.raises(ValueError, match=msg.format("slice")): - s[0:3] = list(range(27)) - - s[0:3] = list(range(3)) - expected = Series([0, 1, 2]) - tm.assert_series_equal(s.astype(np.int64), expected) - - # slice with step - s = Series(list("abcdef")) - - with pytest.raises(ValueError, match=msg.format("slice")): - s[0:4:2] = list(range(27)) - - s = Series(list("abcdef")) - s[0:4:2] = list(range(2)) - expected = Series([0, "b", 1, "d", "e", "f"]) - tm.assert_series_equal(s, expected) - - # neg slices - s = Series(list("abcdef")) - - with pytest.raises(ValueError, match=msg.format("slice")): - s[:-1] = list(range(27)) - - s[-3:-1] = list(range(2)) - expected = Series(["a", "b", "c", 0, 1, "f"]) - tm.assert_series_equal(s, expected) - - # list - s = Series(list("abc")) - - with pytest.raises(ValueError, match=msg.format("list-like")): - s[[0, 1, 2]] = list(range(27)) - - s = Series(list("abc")) - - with pytest.raises(ValueError, match=msg.format("list-like")): - s[[0, 1, 2]] = list(range(2)) - - # scalar - s = Series(list("abc")) - s[0] = list(range(10)) - expected = Series([list(range(10)), "b", "c"]) - tm.assert_series_equal(s, expected) - - -@pytest.mark.parametrize("size", range(2, 6)) -@pytest.mark.parametrize( - "mask", [[True, False, False, False, False], [True, False], [False]] -) -@pytest.mark.parametrize( - "item", [2.0, np.nan, np.finfo(np.float).max, np.finfo(np.float).min] -) -# Test numpy arrays, lists and tuples as the input to be -# broadcast -@pytest.mark.parametrize( - "box", [lambda x: np.array([x]), lambda x: [x], lambda x: (x,)] -) -def test_broadcast(size, mask, item, box): - selection = np.resize(mask, size) - - data = np.arange(size, dtype=float) - - # Construct the expected series by taking the source - # data or item based on the selection - expected = Series( - [item if use_item else data[i] for i, use_item in enumerate(selection)] - ) - - s = Series(data) - s[selection] = box(item) - tm.assert_series_equal(s, expected) - - s = Series(data) - result = s.where(~selection, box(item)) - tm.assert_series_equal(result, expected) - - s = Series(data) - result = s.mask(selection, box(item)) - tm.assert_series_equal(result, expected) - - -def test_where_inplace(): - s = Series(np.random.randn(5)) - cond = s > 0 - - rs = s.copy() - - rs.where(cond, inplace=True) - tm.assert_series_equal(rs.dropna(), s[cond]) - tm.assert_series_equal(rs, s.where(cond)) - - rs = s.copy() - rs.where(cond, -s, inplace=True) - tm.assert_series_equal(rs, s.where(cond, -s)) - - -def test_where_dups(): - # GH 4550 - # where crashes with dups in index - s1 = Series(list(range(3))) - s2 = Series(list(range(3))) - comb = pd.concat([s1, s2]) - result = comb.where(comb < 2) - expected = Series([0, 1, np.nan, 0, 1, np.nan], index=[0, 1, 2, 0, 1, 2]) - tm.assert_series_equal(result, expected) - - # GH 4548 - # inplace updating not working with dups - comb[comb < 1] = 5 - expected = Series([5, 1, 2, 5, 1, 2], index=[0, 1, 2, 0, 1, 2]) - tm.assert_series_equal(comb, expected) - - comb[comb < 2] += 10 - expected = Series([5, 11, 2, 5, 11, 2], index=[0, 1, 2, 0, 1, 2]) - tm.assert_series_equal(comb, expected) - - -def test_where_numeric_with_string(): - # GH 9280 - s = pd.Series([1, 2, 3]) - w = s.where(s > 1, "X") - - assert not is_integer(w[0]) - assert is_integer(w[1]) - assert is_integer(w[2]) - assert isinstance(w[0], str) - assert w.dtype == "object" - - w = s.where(s > 1, ["X", "Y", "Z"]) - assert not is_integer(w[0]) - assert is_integer(w[1]) - assert is_integer(w[2]) - assert isinstance(w[0], str) - assert w.dtype == "object" - - w = s.where(s > 1, np.array(["X", "Y", "Z"])) - assert not is_integer(w[0]) - assert is_integer(w[1]) - assert is_integer(w[2]) - assert isinstance(w[0], str) - assert w.dtype == "object" - - -def test_where_timedelta_coerce(): - s = Series([1, 2], dtype="timedelta64[ns]") - expected = Series([10, 10]) - mask = np.array([False, False]) - - rs = s.where(mask, [10, 10]) - tm.assert_series_equal(rs, expected) - - rs = s.where(mask, 10) - tm.assert_series_equal(rs, expected) - - rs = s.where(mask, 10.0) - tm.assert_series_equal(rs, expected) - - rs = s.where(mask, [10.0, 10.0]) - tm.assert_series_equal(rs, expected) - - rs = s.where(mask, [10.0, np.nan]) - expected = Series([10, None], dtype="object") - tm.assert_series_equal(rs, expected) - - -def test_where_datetime_conversion(): - s = Series(date_range("20130102", periods=2)) - expected = Series([10, 10]) - mask = np.array([False, False]) - - rs = s.where(mask, [10, 10]) - tm.assert_series_equal(rs, expected) - - rs = s.where(mask, 10) - tm.assert_series_equal(rs, expected) - - rs = s.where(mask, 10.0) - tm.assert_series_equal(rs, expected) - - rs = s.where(mask, [10.0, 10.0]) - tm.assert_series_equal(rs, expected) - - rs = s.where(mask, [10.0, np.nan]) - expected = Series([10, None], dtype="object") - tm.assert_series_equal(rs, expected) - - # GH 15701 - timestamps = ["2016-12-31 12:00:04+00:00", "2016-12-31 12:00:04.010000+00:00"] - s = Series([pd.Timestamp(t) for t in timestamps]) - rs = s.where(Series([False, True])) - expected = Series([pd.NaT, s[1]]) - tm.assert_series_equal(rs, expected) - - -def test_where_dt_tz_values(tz_naive_fixture): - ser1 = pd.Series( - pd.DatetimeIndex(["20150101", "20150102", "20150103"], tz=tz_naive_fixture) - ) - ser2 = pd.Series( - pd.DatetimeIndex(["20160514", "20160515", "20160516"], tz=tz_naive_fixture) - ) - mask = pd.Series([True, True, False]) - result = ser1.where(mask, ser2) - exp = pd.Series( - pd.DatetimeIndex(["20150101", "20150102", "20160516"], tz=tz_naive_fixture) - ) - tm.assert_series_equal(exp, result) - - -def test_mask(): - # compare with tested results in test_where - s = Series(np.random.randn(5)) - cond = s > 0 - - rs = s.where(~cond, np.nan) - tm.assert_series_equal(rs, s.mask(cond)) - - rs = s.where(~cond) - rs2 = s.mask(cond) - tm.assert_series_equal(rs, rs2) - - rs = s.where(~cond, -s) - rs2 = s.mask(cond, -s) - tm.assert_series_equal(rs, rs2) - - cond = Series([True, False, False, True, False], index=s.index) - s2 = -(s.abs()) - rs = s2.where(~cond[:3]) - rs2 = s2.mask(cond[:3]) - tm.assert_series_equal(rs, rs2) - - rs = s2.where(~cond[:3], -s2) - rs2 = s2.mask(cond[:3], -s2) - tm.assert_series_equal(rs, rs2) - - msg = "Array conditional must be same shape as self" - with pytest.raises(ValueError, match=msg): - s.mask(1) - with pytest.raises(ValueError, match=msg): - s.mask(cond[:3].values, -s) - - # dtype changes - s = Series([1, 2, 3, 4]) - result = s.mask(s > 2, np.nan) - expected = Series([1, 2, np.nan, np.nan]) - tm.assert_series_equal(result, expected) - - # see gh-21891 - s = Series([1, 2]) - res = s.mask([True, False]) - - exp = Series([np.nan, 2]) - tm.assert_series_equal(res, exp) - - -def test_mask_inplace(): - s = Series(np.random.randn(5)) - cond = s > 0 - - rs = s.copy() - rs.mask(cond, inplace=True) - tm.assert_series_equal(rs.dropna(), s[~cond]) - tm.assert_series_equal(rs, s.mask(cond)) - - rs = s.copy() - rs.mask(cond, -s, inplace=True) - tm.assert_series_equal(rs, s.mask(cond, -s)) diff --git a/pandas/tests/series/indexing/test_datetime.py b/pandas/tests/series/indexing/test_datetime.py index 813c195b36f67..acaa9de88a836 100644 --- a/pandas/tests/series/indexing/test_datetime.py +++ b/pandas/tests/series/indexing/test_datetime.py @@ -148,7 +148,6 @@ def test_frame_datetime64_duplicated(): def test_getitem_setitem_datetime_tz_pytz(): from pytz import timezone as tz - from pandas import date_range N = 50 # testing with timezone, GH #2785 @@ -189,8 +188,6 @@ def test_getitem_setitem_datetime_tz_dateutil(): lambda x: tzutc() if x == "UTC" else gettz(x) ) # handle special case for utc in dateutil - from pandas import date_range - N = 50 # testing with timezone, GH #2785 @@ -373,7 +370,6 @@ def test_getitem_median_slice_bug(): def test_datetime_indexing(): - from pandas import date_range index = date_range("1/1/2000", "1/7/2000") index = index.repeat(3) diff --git a/pandas/tests/series/indexing/test_get.py b/pandas/tests/series/indexing/test_get.py new file mode 100644 index 0000000000000..438b61ed203a3 --- /dev/null +++ b/pandas/tests/series/indexing/test_get.py @@ -0,0 +1,134 @@ +import numpy as np + +import pandas as pd +from pandas import Series + + +def test_get(): + # GH 6383 + s = Series( + np.array( + [ + 43, + 48, + 60, + 48, + 50, + 51, + 50, + 45, + 57, + 48, + 56, + 45, + 51, + 39, + 55, + 43, + 54, + 52, + 51, + 54, + ] + ) + ) + + result = s.get(25, 0) + expected = 0 + assert result == expected + + s = Series( + np.array( + [ + 43, + 48, + 60, + 48, + 50, + 51, + 50, + 45, + 57, + 48, + 56, + 45, + 51, + 39, + 55, + 43, + 54, + 52, + 51, + 54, + ] + ), + index=pd.Float64Index( + [ + 25.0, + 36.0, + 49.0, + 64.0, + 81.0, + 100.0, + 121.0, + 144.0, + 169.0, + 196.0, + 1225.0, + 1296.0, + 1369.0, + 1444.0, + 1521.0, + 1600.0, + 1681.0, + 1764.0, + 1849.0, + 1936.0, + ] + ), + ) + + result = s.get(25, 0) + expected = 43 + assert result == expected + + # GH 7407 + # with a boolean accessor + df = pd.DataFrame({"i": [0] * 3, "b": [False] * 3}) + vc = df.i.value_counts() + result = vc.get(99, default="Missing") + assert result == "Missing" + + vc = df.b.value_counts() + result = vc.get(False, default="Missing") + assert result == 3 + + result = vc.get(True, default="Missing") + assert result == "Missing" + + +def test_get_nan(): + # GH 8569 + s = pd.Float64Index(range(10)).to_series() + assert s.get(np.nan) is None + assert s.get(np.nan, default="Missing") == "Missing" + + +def test_get_nan_multiple(): + # GH 8569 + # ensure that fixing "test_get_nan" above hasn't broken get + # with multiple elements + s = pd.Float64Index(range(10)).to_series() + + idx = [2, 30] + assert s.get(idx) is None + + idx = [2, np.nan] + assert s.get(idx) is None + + # GH 17295 - all missing keys + idx = [20, 30] + assert s.get(idx) is None + + idx = [np.nan, np.nan] + assert s.get(idx) is None diff --git a/pandas/tests/series/indexing/test_indexing.py b/pandas/tests/series/indexing/test_indexing.py index 18dbd22b73b35..d2a09efd01331 100644 --- a/pandas/tests/series/indexing/test_indexing.py +++ b/pandas/tests/series/indexing/test_indexing.py @@ -883,41 +883,6 @@ def test_pop(): tm.assert_series_equal(k, expected) -def test_take(): - s = Series([-1, 5, 6, 2, 4]) - - actual = s.take([1, 3, 4]) - expected = Series([5, 2, 4], index=[1, 3, 4]) - tm.assert_series_equal(actual, expected) - - actual = s.take([-1, 3, 4]) - expected = Series([4, 2, 4], index=[4, 3, 4]) - tm.assert_series_equal(actual, expected) - - msg = "index {} is out of bounds for( axis 0 with)? size 5" - with pytest.raises(IndexError, match=msg.format(10)): - s.take([1, 10]) - with pytest.raises(IndexError, match=msg.format(5)): - s.take([2, 5]) - - -def test_take_categorical(): - # https://github.com/pandas-dev/pandas/issues/20664 - s = Series(pd.Categorical(["a", "b", "c"])) - result = s.take([-2, -2, 0]) - expected = Series( - pd.Categorical(["b", "b", "a"], categories=["a", "b", "c"]), index=[1, 1, 0] - ) - tm.assert_series_equal(result, expected) - - -def test_head_tail(string_series): - tm.assert_series_equal(string_series.head(), string_series[:5]) - tm.assert_series_equal(string_series.head(0), string_series[0:0]) - tm.assert_series_equal(string_series.tail(), string_series[-5:]) - tm.assert_series_equal(string_series.tail(0), string_series[0:0]) - - def test_uint_drop(any_int_dtype): # see GH18311 # assigning series.loc[0] = 4 changed series.dtype to int diff --git a/pandas/tests/series/indexing/test_mask.py b/pandas/tests/series/indexing/test_mask.py new file mode 100644 index 0000000000000..dc4fb530dbb52 --- /dev/null +++ b/pandas/tests/series/indexing/test_mask.py @@ -0,0 +1,65 @@ +import numpy as np +import pytest + +from pandas import Series +import pandas._testing as tm + + +def test_mask(): + # compare with tested results in test_where + s = Series(np.random.randn(5)) + cond = s > 0 + + rs = s.where(~cond, np.nan) + tm.assert_series_equal(rs, s.mask(cond)) + + rs = s.where(~cond) + rs2 = s.mask(cond) + tm.assert_series_equal(rs, rs2) + + rs = s.where(~cond, -s) + rs2 = s.mask(cond, -s) + tm.assert_series_equal(rs, rs2) + + cond = Series([True, False, False, True, False], index=s.index) + s2 = -(s.abs()) + rs = s2.where(~cond[:3]) + rs2 = s2.mask(cond[:3]) + tm.assert_series_equal(rs, rs2) + + rs = s2.where(~cond[:3], -s2) + rs2 = s2.mask(cond[:3], -s2) + tm.assert_series_equal(rs, rs2) + + msg = "Array conditional must be same shape as self" + with pytest.raises(ValueError, match=msg): + s.mask(1) + with pytest.raises(ValueError, match=msg): + s.mask(cond[:3].values, -s) + + # dtype changes + s = Series([1, 2, 3, 4]) + result = s.mask(s > 2, np.nan) + expected = Series([1, 2, np.nan, np.nan]) + tm.assert_series_equal(result, expected) + + # see gh-21891 + s = Series([1, 2]) + res = s.mask([True, False]) + + exp = Series([np.nan, 2]) + tm.assert_series_equal(res, exp) + + +def test_mask_inplace(): + s = Series(np.random.randn(5)) + cond = s > 0 + + rs = s.copy() + rs.mask(cond, inplace=True) + tm.assert_series_equal(rs.dropna(), s[~cond]) + tm.assert_series_equal(rs, s.mask(cond)) + + rs = s.copy() + rs.mask(cond, -s, inplace=True) + tm.assert_series_equal(rs, s.mask(cond, -s)) diff --git a/pandas/tests/series/indexing/test_numeric.py b/pandas/tests/series/indexing/test_numeric.py index 3684ca00c2f17..176af6eda2d76 100644 --- a/pandas/tests/series/indexing/test_numeric.py +++ b/pandas/tests/series/indexing/test_numeric.py @@ -1,141 +1,10 @@ import numpy as np import pytest -import pandas as pd from pandas import DataFrame, Index, Series import pandas._testing as tm -def test_get(): - # GH 6383 - s = Series( - np.array( - [ - 43, - 48, - 60, - 48, - 50, - 51, - 50, - 45, - 57, - 48, - 56, - 45, - 51, - 39, - 55, - 43, - 54, - 52, - 51, - 54, - ] - ) - ) - - result = s.get(25, 0) - expected = 0 - assert result == expected - - s = Series( - np.array( - [ - 43, - 48, - 60, - 48, - 50, - 51, - 50, - 45, - 57, - 48, - 56, - 45, - 51, - 39, - 55, - 43, - 54, - 52, - 51, - 54, - ] - ), - index=pd.Float64Index( - [ - 25.0, - 36.0, - 49.0, - 64.0, - 81.0, - 100.0, - 121.0, - 144.0, - 169.0, - 196.0, - 1225.0, - 1296.0, - 1369.0, - 1444.0, - 1521.0, - 1600.0, - 1681.0, - 1764.0, - 1849.0, - 1936.0, - ] - ), - ) - - result = s.get(25, 0) - expected = 43 - assert result == expected - - # GH 7407 - # with a boolean accessor - df = pd.DataFrame({"i": [0] * 3, "b": [False] * 3}) - vc = df.i.value_counts() - result = vc.get(99, default="Missing") - assert result == "Missing" - - vc = df.b.value_counts() - result = vc.get(False, default="Missing") - assert result == 3 - - result = vc.get(True, default="Missing") - assert result == "Missing" - - -def test_get_nan(): - # GH 8569 - s = pd.Float64Index(range(10)).to_series() - assert s.get(np.nan) is None - assert s.get(np.nan, default="Missing") == "Missing" - - -def test_get_nan_multiple(): - # GH 8569 - # ensure that fixing "test_get_nan" above hasn't broken get - # with multiple elements - s = pd.Float64Index(range(10)).to_series() - - idx = [2, 30] - assert s.get(idx) is None - - idx = [2, np.nan] - assert s.get(idx) is None - - # GH 17295 - all missing keys - idx = [20, 30] - assert s.get(idx) is None - - idx = [np.nan, np.nan] - assert s.get(idx) is None - - def test_delitem(): # GH 5542 # should delete the item inplace diff --git a/pandas/tests/series/indexing/test_take.py b/pandas/tests/series/indexing/test_take.py new file mode 100644 index 0000000000000..9368d49e5ff2b --- /dev/null +++ b/pandas/tests/series/indexing/test_take.py @@ -0,0 +1,33 @@ +import pytest + +import pandas as pd +from pandas import Series +import pandas._testing as tm + + +def test_take(): + ser = Series([-1, 5, 6, 2, 4]) + + actual = ser.take([1, 3, 4]) + expected = Series([5, 2, 4], index=[1, 3, 4]) + tm.assert_series_equal(actual, expected) + + actual = ser.take([-1, 3, 4]) + expected = Series([4, 2, 4], index=[4, 3, 4]) + tm.assert_series_equal(actual, expected) + + msg = "index {} is out of bounds for( axis 0 with)? size 5" + with pytest.raises(IndexError, match=msg.format(10)): + ser.take([1, 10]) + with pytest.raises(IndexError, match=msg.format(5)): + ser.take([2, 5]) + + +def test_take_categorical(): + # https://github.com/pandas-dev/pandas/issues/20664 + ser = Series(pd.Categorical(["a", "b", "c"])) + result = ser.take([-2, -2, 0]) + expected = Series( + pd.Categorical(["b", "b", "a"], categories=["a", "b", "c"]), index=[1, 1, 0] + ) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/series/indexing/test_where.py b/pandas/tests/series/indexing/test_where.py new file mode 100644 index 0000000000000..9703f5afaf689 --- /dev/null +++ b/pandas/tests/series/indexing/test_where.py @@ -0,0 +1,437 @@ +import numpy as np +import pytest + +from pandas.core.dtypes.common import is_integer + +import pandas as pd +from pandas import Series, Timestamp, date_range, isna +import pandas._testing as tm + + +def test_where_unsafe_int(sint_dtype): + s = Series(np.arange(10), dtype=sint_dtype) + mask = s < 5 + + s[mask] = range(2, 7) + expected = Series(list(range(2, 7)) + list(range(5, 10)), dtype=sint_dtype) + + tm.assert_series_equal(s, expected) + + +def test_where_unsafe_float(float_dtype): + s = Series(np.arange(10), dtype=float_dtype) + mask = s < 5 + + s[mask] = range(2, 7) + data = list(range(2, 7)) + list(range(5, 10)) + expected = Series(data, dtype=float_dtype) + + tm.assert_series_equal(s, expected) + + +@pytest.mark.parametrize( + "dtype,expected_dtype", + [ + (np.int8, np.float64), + (np.int16, np.float64), + (np.int32, np.float64), + (np.int64, np.float64), + (np.float32, np.float32), + (np.float64, np.float64), + ], +) +def test_where_unsafe_upcast(dtype, expected_dtype): + # see gh-9743 + s = Series(np.arange(10), dtype=dtype) + values = [2.5, 3.5, 4.5, 5.5, 6.5] + mask = s < 5 + expected = Series(values + list(range(5, 10)), dtype=expected_dtype) + s[mask] = values + tm.assert_series_equal(s, expected) + + +def test_where_unsafe(): + # see gh-9731 + s = Series(np.arange(10), dtype="int64") + values = [2.5, 3.5, 4.5, 5.5] + + mask = s > 5 + expected = Series(list(range(6)) + values, dtype="float64") + + s[mask] = values + tm.assert_series_equal(s, expected) + + # see gh-3235 + s = Series(np.arange(10), dtype="int64") + mask = s < 5 + s[mask] = range(2, 7) + expected = Series(list(range(2, 7)) + list(range(5, 10)), dtype="int64") + tm.assert_series_equal(s, expected) + assert s.dtype == expected.dtype + + s = Series(np.arange(10), dtype="int64") + mask = s > 5 + s[mask] = [0] * 4 + expected = Series([0, 1, 2, 3, 4, 5] + [0] * 4, dtype="int64") + tm.assert_series_equal(s, expected) + + s = Series(np.arange(10)) + mask = s > 5 + + msg = "cannot assign mismatch length to masked array" + with pytest.raises(ValueError, match=msg): + s[mask] = [5, 4, 3, 2, 1] + + with pytest.raises(ValueError, match=msg): + s[mask] = [0] * 5 + + # dtype changes + s = Series([1, 2, 3, 4]) + result = s.where(s > 2, np.nan) + expected = Series([np.nan, np.nan, 3, 4]) + tm.assert_series_equal(result, expected) + + # GH 4667 + # setting with None changes dtype + s = Series(range(10)).astype(float) + s[8] = None + result = s[8] + assert isna(result) + + s = Series(range(10)).astype(float) + s[s > 8] = None + result = s[isna(s)] + expected = Series(np.nan, index=[9]) + tm.assert_series_equal(result, expected) + + +def test_where(): + s = Series(np.random.randn(5)) + cond = s > 0 + + rs = s.where(cond).dropna() + rs2 = s[cond] + tm.assert_series_equal(rs, rs2) + + rs = s.where(cond, -s) + tm.assert_series_equal(rs, s.abs()) + + rs = s.where(cond) + assert s.shape == rs.shape + assert rs is not s + + # test alignment + cond = Series([True, False, False, True, False], index=s.index) + s2 = -(s.abs()) + + expected = s2[cond].reindex(s2.index[:3]).reindex(s2.index) + rs = s2.where(cond[:3]) + tm.assert_series_equal(rs, expected) + + expected = s2.abs() + expected.iloc[0] = s2[0] + rs = s2.where(cond[:3], -s2) + tm.assert_series_equal(rs, expected) + + +def test_where_error(): + s = Series(np.random.randn(5)) + cond = s > 0 + + msg = "Array conditional must be same shape as self" + with pytest.raises(ValueError, match=msg): + s.where(1) + with pytest.raises(ValueError, match=msg): + s.where(cond[:3].values, -s) + + # GH 2745 + s = Series([1, 2]) + s[[True, False]] = [0, 1] + expected = Series([0, 2]) + tm.assert_series_equal(s, expected) + + # failures + msg = "cannot assign mismatch length to masked array" + with pytest.raises(ValueError, match=msg): + s[[True, False]] = [0, 2, 3] + msg = ( + "NumPy boolean array indexing assignment cannot assign 0 input " + "values to the 1 output values where the mask is true" + ) + with pytest.raises(ValueError, match=msg): + s[[True, False]] = [] + + +@pytest.mark.parametrize("klass", [list, tuple, np.array, Series]) +def test_where_array_like(klass): + # see gh-15414 + s = Series([1, 2, 3]) + cond = [False, True, True] + expected = Series([np.nan, 2, 3]) + + result = s.where(klass(cond)) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize( + "cond", + [ + [1, 0, 1], + Series([2, 5, 7]), + ["True", "False", "True"], + [Timestamp("2017-01-01"), pd.NaT, Timestamp("2017-01-02")], + ], +) +def test_where_invalid_input(cond): + # see gh-15414: only boolean arrays accepted + s = Series([1, 2, 3]) + msg = "Boolean array expected for the condition" + + with pytest.raises(ValueError, match=msg): + s.where(cond) + + msg = "Array conditional must be same shape as self" + with pytest.raises(ValueError, match=msg): + s.where([True]) + + +def test_where_ndframe_align(): + msg = "Array conditional must be same shape as self" + s = Series([1, 2, 3]) + + cond = [True] + with pytest.raises(ValueError, match=msg): + s.where(cond) + + expected = Series([1, np.nan, np.nan]) + + out = s.where(Series(cond)) + tm.assert_series_equal(out, expected) + + cond = np.array([False, True, False, True]) + with pytest.raises(ValueError, match=msg): + s.where(cond) + + expected = Series([np.nan, 2, np.nan]) + + out = s.where(Series(cond)) + tm.assert_series_equal(out, expected) + + +def test_where_setitem_invalid(): + # GH 2702 + # make sure correct exceptions are raised on invalid list assignment + + msg = "cannot set using a {} indexer with a different length than the value" + + # slice + s = Series(list("abc")) + + with pytest.raises(ValueError, match=msg.format("slice")): + s[0:3] = list(range(27)) + + s[0:3] = list(range(3)) + expected = Series([0, 1, 2]) + tm.assert_series_equal(s.astype(np.int64), expected) + + # slice with step + s = Series(list("abcdef")) + + with pytest.raises(ValueError, match=msg.format("slice")): + s[0:4:2] = list(range(27)) + + s = Series(list("abcdef")) + s[0:4:2] = list(range(2)) + expected = Series([0, "b", 1, "d", "e", "f"]) + tm.assert_series_equal(s, expected) + + # neg slices + s = Series(list("abcdef")) + + with pytest.raises(ValueError, match=msg.format("slice")): + s[:-1] = list(range(27)) + + s[-3:-1] = list(range(2)) + expected = Series(["a", "b", "c", 0, 1, "f"]) + tm.assert_series_equal(s, expected) + + # list + s = Series(list("abc")) + + with pytest.raises(ValueError, match=msg.format("list-like")): + s[[0, 1, 2]] = list(range(27)) + + s = Series(list("abc")) + + with pytest.raises(ValueError, match=msg.format("list-like")): + s[[0, 1, 2]] = list(range(2)) + + # scalar + s = Series(list("abc")) + s[0] = list(range(10)) + expected = Series([list(range(10)), "b", "c"]) + tm.assert_series_equal(s, expected) + + +@pytest.mark.parametrize("size", range(2, 6)) +@pytest.mark.parametrize( + "mask", [[True, False, False, False, False], [True, False], [False]] +) +@pytest.mark.parametrize( + "item", [2.0, np.nan, np.finfo(np.float).max, np.finfo(np.float).min] +) +# Test numpy arrays, lists and tuples as the input to be +# broadcast +@pytest.mark.parametrize( + "box", [lambda x: np.array([x]), lambda x: [x], lambda x: (x,)] +) +def test_broadcast(size, mask, item, box): + selection = np.resize(mask, size) + + data = np.arange(size, dtype=float) + + # Construct the expected series by taking the source + # data or item based on the selection + expected = Series( + [item if use_item else data[i] for i, use_item in enumerate(selection)] + ) + + s = Series(data) + s[selection] = box(item) + tm.assert_series_equal(s, expected) + + s = Series(data) + result = s.where(~selection, box(item)) + tm.assert_series_equal(result, expected) + + s = Series(data) + result = s.mask(selection, box(item)) + tm.assert_series_equal(result, expected) + + +def test_where_inplace(): + s = Series(np.random.randn(5)) + cond = s > 0 + + rs = s.copy() + + rs.where(cond, inplace=True) + tm.assert_series_equal(rs.dropna(), s[cond]) + tm.assert_series_equal(rs, s.where(cond)) + + rs = s.copy() + rs.where(cond, -s, inplace=True) + tm.assert_series_equal(rs, s.where(cond, -s)) + + +def test_where_dups(): + # GH 4550 + # where crashes with dups in index + s1 = Series(list(range(3))) + s2 = Series(list(range(3))) + comb = pd.concat([s1, s2]) + result = comb.where(comb < 2) + expected = Series([0, 1, np.nan, 0, 1, np.nan], index=[0, 1, 2, 0, 1, 2]) + tm.assert_series_equal(result, expected) + + # GH 4548 + # inplace updating not working with dups + comb[comb < 1] = 5 + expected = Series([5, 1, 2, 5, 1, 2], index=[0, 1, 2, 0, 1, 2]) + tm.assert_series_equal(comb, expected) + + comb[comb < 2] += 10 + expected = Series([5, 11, 2, 5, 11, 2], index=[0, 1, 2, 0, 1, 2]) + tm.assert_series_equal(comb, expected) + + +def test_where_numeric_with_string(): + # GH 9280 + s = pd.Series([1, 2, 3]) + w = s.where(s > 1, "X") + + assert not is_integer(w[0]) + assert is_integer(w[1]) + assert is_integer(w[2]) + assert isinstance(w[0], str) + assert w.dtype == "object" + + w = s.where(s > 1, ["X", "Y", "Z"]) + assert not is_integer(w[0]) + assert is_integer(w[1]) + assert is_integer(w[2]) + assert isinstance(w[0], str) + assert w.dtype == "object" + + w = s.where(s > 1, np.array(["X", "Y", "Z"])) + assert not is_integer(w[0]) + assert is_integer(w[1]) + assert is_integer(w[2]) + assert isinstance(w[0], str) + assert w.dtype == "object" + + +def test_where_timedelta_coerce(): + s = Series([1, 2], dtype="timedelta64[ns]") + expected = Series([10, 10]) + mask = np.array([False, False]) + + rs = s.where(mask, [10, 10]) + tm.assert_series_equal(rs, expected) + + rs = s.where(mask, 10) + tm.assert_series_equal(rs, expected) + + rs = s.where(mask, 10.0) + tm.assert_series_equal(rs, expected) + + rs = s.where(mask, [10.0, 10.0]) + tm.assert_series_equal(rs, expected) + + rs = s.where(mask, [10.0, np.nan]) + expected = Series([10, None], dtype="object") + tm.assert_series_equal(rs, expected) + + +def test_where_datetime_conversion(): + s = Series(date_range("20130102", periods=2)) + expected = Series([10, 10]) + mask = np.array([False, False]) + + rs = s.where(mask, [10, 10]) + tm.assert_series_equal(rs, expected) + + rs = s.where(mask, 10) + tm.assert_series_equal(rs, expected) + + rs = s.where(mask, 10.0) + tm.assert_series_equal(rs, expected) + + rs = s.where(mask, [10.0, 10.0]) + tm.assert_series_equal(rs, expected) + + rs = s.where(mask, [10.0, np.nan]) + expected = Series([10, None], dtype="object") + tm.assert_series_equal(rs, expected) + + # GH 15701 + timestamps = ["2016-12-31 12:00:04+00:00", "2016-12-31 12:00:04.010000+00:00"] + s = Series([pd.Timestamp(t) for t in timestamps]) + rs = s.where(Series([False, True])) + expected = Series([pd.NaT, s[1]]) + tm.assert_series_equal(rs, expected) + + +def test_where_dt_tz_values(tz_naive_fixture): + ser1 = pd.Series( + pd.DatetimeIndex(["20150101", "20150102", "20150103"], tz=tz_naive_fixture) + ) + ser2 = pd.Series( + pd.DatetimeIndex(["20160514", "20160515", "20160516"], tz=tz_naive_fixture) + ) + mask = pd.Series([True, True, False]) + result = ser1.where(mask, ser2) + exp = pd.Series( + pd.DatetimeIndex(["20150101", "20150102", "20160516"], tz=tz_naive_fixture) + ) + tm.assert_series_equal(exp, result) diff --git a/pandas/tests/series/test_convert_dtypes.py b/pandas/tests/series/methods/test_convert_dtypes.py similarity index 100% rename from pandas/tests/series/test_convert_dtypes.py rename to pandas/tests/series/methods/test_convert_dtypes.py diff --git a/pandas/tests/series/methods/test_head_tail.py b/pandas/tests/series/methods/test_head_tail.py new file mode 100644 index 0000000000000..d9f8d85eda350 --- /dev/null +++ b/pandas/tests/series/methods/test_head_tail.py @@ -0,0 +1,8 @@ +import pandas._testing as tm + + +def test_head_tail(string_series): + tm.assert_series_equal(string_series.head(), string_series[:5]) + tm.assert_series_equal(string_series.head(0), string_series[0:0]) + tm.assert_series_equal(string_series.tail(), string_series[-5:]) + tm.assert_series_equal(string_series.tail(0), string_series[0:0]) diff --git a/pandas/tests/series/test_reshaping.py b/pandas/tests/series/methods/test_unstack.py similarity index 100% rename from pandas/tests/series/test_reshaping.py rename to pandas/tests/series/methods/test_unstack.py diff --git a/pandas/tests/series/test_combine_concat.py b/pandas/tests/series/test_combine_concat.py index 239353d3955b4..4cb471597b67a 100644 --- a/pandas/tests/series/test_combine_concat.py +++ b/pandas/tests/series/test_combine_concat.py @@ -4,7 +4,7 @@ import pytest import pandas as pd -from pandas import DataFrame, Series +from pandas import DataFrame, Series, to_datetime import pandas._testing as tm @@ -252,7 +252,6 @@ def test_concat_empty_series_dtypes(self): assert result.dtype == expected def test_combine_first_dt64(self): - from pandas.core.tools.datetimes import to_datetime s0 = to_datetime(Series(["2010", np.NaN])) s1 = to_datetime(Series([np.NaN, "2011"]))