diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index 55465dffd2027..e1ce10970f07b 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -185,6 +185,53 @@ def test_setitem_extension_types(self, obj, dtype): tm.assert_frame_equal(df, expected) + def test_setitem_dt64_ndarray_with_NaT_and_diff_time_units(self): + # GH#7492 + data_ns = np.array([1, "nat"], dtype="datetime64[ns]") + result = Series(data_ns).to_frame() + result["new"] = data_ns + expected = DataFrame({0: [1, None], "new": [1, None]}, dtype="datetime64[ns]") + tm.assert_frame_equal(result, expected) + + # OutOfBoundsDatetime error shouldn't occur + data_s = np.array([1, "nat"], dtype="datetime64[s]") + result["new"] = data_s + expected = DataFrame({0: [1, None], "new": [1e9, None]}, dtype="datetime64[ns]") + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize("unit", ["h", "m", "s", "ms", "D", "M", "Y"]) + def test_frame_setitem_datetime64_col_other_units(self, unit): + # Check that non-nano dt64 values get cast to dt64 on setitem + # into a not-yet-existing column + n = 100 + + dtype = np.dtype(f"M8[{unit}]") + vals = np.arange(n, dtype=np.int64).view(dtype) + ex_vals = vals.astype("datetime64[ns]") + + df = DataFrame({"ints": np.arange(n)}, index=np.arange(n)) + df[unit] = vals + + assert df[unit].dtype == np.dtype("M8[ns]") + assert (df[unit].values == ex_vals).all() + + @pytest.mark.parametrize("unit", ["h", "m", "s", "ms", "D", "M", "Y"]) + def test_frame_setitem_existing_datetime64_col_other_units(self, unit): + # Check that non-nano dt64 values get cast to dt64 on setitem + # into an already-existing dt64 column + n = 100 + + dtype = np.dtype(f"M8[{unit}]") + vals = np.arange(n, dtype=np.int64).view(dtype) + ex_vals = vals.astype("datetime64[ns]") + + df = DataFrame({"ints": np.arange(n)}, index=np.arange(n)) + df["dates"] = np.arange(n, dtype=np.int64).view("M8[ns]") + + # We overwrite existing dt64 column with new, non-nano dt64 vals + df["dates"] = vals + assert (df["dates"].values == ex_vals).all() + def test_setitem_dt64tz(self, timezone_frame): df = timezone_frame diff --git a/pandas/tests/frame/test_add_prefix_suffix.py b/pandas/tests/frame/methods/test_add_prefix_suffix.py similarity index 100% rename from pandas/tests/frame/test_add_prefix_suffix.py rename to pandas/tests/frame/methods/test_add_prefix_suffix.py diff --git a/pandas/tests/frame/methods/test_reset_index.py b/pandas/tests/frame/methods/test_reset_index.py index 92c9f7564a670..56fd633f5f22b 100644 --- a/pandas/tests/frame/methods/test_reset_index.py +++ b/pandas/tests/frame/methods/test_reset_index.py @@ -71,6 +71,15 @@ def test_reset_index_tz(self, tz_aware_fixture): expected["idx"] = expected["idx"].apply(lambda d: Timestamp(d, tz=tz)) tm.assert_frame_equal(df.reset_index(), expected) + @pytest.mark.parametrize("tz", ["US/Eastern", "dateutil/US/Eastern"]) + def test_frame_reset_index_tzaware_index(self, tz): + dr = date_range("2012-06-02", periods=10, tz=tz) + df = DataFrame(np.random.randn(len(dr)), dr) + roundtripped = df.reset_index().set_index("index") + xp = df.index.tz + rs = roundtripped.index.tz + assert xp == rs + def test_reset_index_with_intervals(self): idx = IntervalIndex.from_breaks(np.arange(11), name="x") original = DataFrame({"x": idx, "y": np.arange(10)})[["x", "y"]] diff --git a/pandas/tests/frame/methods/test_transpose.py b/pandas/tests/frame/methods/test_transpose.py index a5fe5f3a6d5e4..8635168f1eb03 100644 --- a/pandas/tests/frame/methods/test_transpose.py +++ b/pandas/tests/frame/methods/test_transpose.py @@ -1,45 +1,55 @@ import numpy as np +import pytest -import pandas as pd +from pandas import DataFrame, date_range import pandas._testing as tm class TestTranspose: def test_transpose_tzaware_1col_single_tz(self): # GH#26825 - dti = pd.date_range("2016-04-05 04:30", periods=3, tz="UTC") + dti = date_range("2016-04-05 04:30", periods=3, tz="UTC") - df = pd.DataFrame(dti) + df = DataFrame(dti) assert (df.dtypes == dti.dtype).all() res = df.T assert (res.dtypes == dti.dtype).all() def test_transpose_tzaware_2col_single_tz(self): # GH#26825 - dti = pd.date_range("2016-04-05 04:30", periods=3, tz="UTC") + dti = date_range("2016-04-05 04:30", periods=3, tz="UTC") - df3 = pd.DataFrame({"A": dti, "B": dti}) + df3 = DataFrame({"A": dti, "B": dti}) assert (df3.dtypes == dti.dtype).all() res3 = df3.T assert (res3.dtypes == dti.dtype).all() def test_transpose_tzaware_2col_mixed_tz(self): # GH#26825 - dti = pd.date_range("2016-04-05 04:30", periods=3, tz="UTC") + dti = date_range("2016-04-05 04:30", periods=3, tz="UTC") dti2 = dti.tz_convert("US/Pacific") - df4 = pd.DataFrame({"A": dti, "B": dti2}) + df4 = DataFrame({"A": dti, "B": dti2}) assert (df4.dtypes == [dti.dtype, dti2.dtype]).all() assert (df4.T.dtypes == object).all() tm.assert_frame_equal(df4.T.T, df4) + @pytest.mark.parametrize("tz", [None, "America/New_York"]) + def test_transpose_preserves_dtindex_equality_with_dst(self, tz): + # GH#19970 + idx = date_range("20161101", "20161130", freq="4H", tz=tz) + df = DataFrame({"a": range(len(idx)), "b": range(len(idx))}, index=idx) + result = df.T == df.T + expected = DataFrame(True, index=list("ab"), columns=idx) + tm.assert_frame_equal(result, expected) + def test_transpose_object_to_tzaware_mixed_tz(self): # GH#26825 - dti = pd.date_range("2016-04-05 04:30", periods=3, tz="UTC") + dti = date_range("2016-04-05 04:30", periods=3, tz="UTC") dti2 = dti.tz_convert("US/Pacific") # mixed all-tzaware dtypes - df2 = pd.DataFrame([dti, dti2]) + df2 = DataFrame([dti, dti2]) assert (df2.dtypes == object).all() res2 = df2.T assert (res2.dtypes == [dti.dtype, dti2.dtype]).all() @@ -47,7 +57,7 @@ def test_transpose_object_to_tzaware_mixed_tz(self): def test_transpose_uint64(self, uint64_frame): result = uint64_frame.T - expected = pd.DataFrame(uint64_frame.values.T) + expected = DataFrame(uint64_frame.values.T) expected.index = ["A", "B"] tm.assert_frame_equal(result, expected) @@ -63,7 +73,7 @@ def test_transpose_float(self, float_frame): # mixed type index, data = tm.getMixedTypeDict() - mixed = pd.DataFrame(data, index=index) + mixed = DataFrame(data, index=index) mixed_T = mixed.T for col, s in mixed_T.items(): diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 46e34a7a58ae4..408024e48a35a 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -2737,6 +2737,35 @@ def test_constructor_list_str_na(self, string_dtype): class TestDataFrameConstructorWithDatetimeTZ: + @pytest.mark.parametrize("tz", ["US/Eastern", "dateutil/US/Eastern"]) + def test_construction_preserves_tzaware_dtypes(self, tz): + # after GH#7822 + # these retain the timezones on dict construction + dr = date_range("2011/1/1", "2012/1/1", freq="W-FRI") + dr_tz = dr.tz_localize(tz) + df = DataFrame({"A": "foo", "B": dr_tz}, index=dr) + tz_expected = DatetimeTZDtype("ns", dr_tz.tzinfo) + assert df["B"].dtype == tz_expected + + # GH#2810 (with timezones) + datetimes_naive = [ts.to_pydatetime() for ts in dr] + datetimes_with_tz = [ts.to_pydatetime() for ts in dr_tz] + df = DataFrame({"dr": dr}) + df["dr_tz"] = dr_tz + df["datetimes_naive"] = datetimes_naive + df["datetimes_with_tz"] = datetimes_with_tz + result = df.dtypes + expected = Series( + [ + np.dtype("datetime64[ns]"), + DatetimeTZDtype(tz=tz), + np.dtype("datetime64[ns]"), + DatetimeTZDtype(tz=tz), + ], + index=["dr", "dr_tz", "datetimes_naive", "datetimes_with_tz"], + ) + tm.assert_series_equal(result, expected) + def test_constructor_data_aware_dtype_naive(self, tz_aware_fixture): # GH#25843 tz = tz_aware_fixture diff --git a/pandas/tests/frame/test_nonunique_indexes.py b/pandas/tests/frame/test_nonunique_indexes.py index c6b1c69442dbc..1c54855ee7bce 100644 --- a/pandas/tests/frame/test_nonunique_indexes.py +++ b/pandas/tests/frame/test_nonunique_indexes.py @@ -400,29 +400,6 @@ def check(result, expected=None): result = z.loc[["a", "c", "a"]] check(result, expected) - def test_column_dups_indexing2(self): - - # GH 8363 - # datetime ops with a non-unique index - df = DataFrame( - {"A": np.arange(5, dtype="int64"), "B": np.arange(1, 6, dtype="int64")}, - index=[2, 2, 3, 3, 4], - ) - result = df.B - df.A - expected = Series(1, index=[2, 2, 3, 3, 4]) - tm.assert_series_equal(result, expected) - - df = DataFrame( - { - "A": date_range("20130101", periods=5), - "B": date_range("20130101 09:00:00", periods=5), - }, - index=[2, 2, 3, 3, 4], - ) - result = df.B - df.A - expected = Series(pd.Timedelta("9 hours"), index=[2, 2, 3, 3, 4]) - tm.assert_series_equal(result, expected) - def test_columns_with_dups(self): # GH 3468 related diff --git a/pandas/tests/frame/test_timeseries.py b/pandas/tests/frame/test_timeseries.py deleted file mode 100644 index 22ffb30324366..0000000000000 --- a/pandas/tests/frame/test_timeseries.py +++ /dev/null @@ -1,57 +0,0 @@ -import numpy as np -import pytest - -import pandas as pd -from pandas import DataFrame, to_datetime -import pandas._testing as tm - - -class TestDataFrameTimeSeriesMethods: - @pytest.mark.parametrize("unit", ["h", "m", "s", "ms", "D", "M", "Y"]) - def test_frame_append_datetime64_col_other_units(self, unit): - n = 100 - - ns_dtype = np.dtype("M8[ns]") - - dtype = np.dtype(f"M8[{unit}]") - vals = np.arange(n, dtype=np.int64).view(dtype) - - df = DataFrame({"ints": np.arange(n)}, index=np.arange(n)) - df[unit] = vals - - ex_vals = to_datetime(vals.astype("O")).values - - assert df[unit].dtype == ns_dtype - assert (df[unit].values == ex_vals).all() - - @pytest.mark.parametrize("unit", ["h", "m", "s", "ms", "D", "M", "Y"]) - def test_frame_setitem_existing_datetime64_col_other_units(self, unit): - # Test insertion into existing datetime64 column - n = 100 - ns_dtype = np.dtype("M8[ns]") - - df = DataFrame({"ints": np.arange(n)}, index=np.arange(n)) - df["dates"] = np.arange(n, dtype=np.int64).view(ns_dtype) - - dtype = np.dtype(f"M8[{unit}]") - vals = np.arange(n, dtype=np.int64).view(dtype) - - tmp = df.copy() - - tmp["dates"] = vals - ex_vals = to_datetime(vals.astype("O")).values - - assert (tmp["dates"].values == ex_vals).all() - - def test_datetime_assignment_with_NaT_and_diff_time_units(self): - # GH 7492 - data_ns = np.array([1, "nat"], dtype="datetime64[ns]") - result = pd.Series(data_ns).to_frame() - result["new"] = data_ns - expected = DataFrame({0: [1, None], "new": [1, None]}, dtype="datetime64[ns]") - tm.assert_frame_equal(result, expected) - # OutOfBoundsDatetime error shouldn't occur - data_s = np.array([1, "nat"], dtype="datetime64[s]") - result["new"] = data_s - expected = DataFrame({0: [1, None], "new": [1e9, None]}, dtype="datetime64[ns]") - tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/frame/test_timezones.py b/pandas/tests/frame/test_timezones.py deleted file mode 100644 index 1271a490d6b70..0000000000000 --- a/pandas/tests/frame/test_timezones.py +++ /dev/null @@ -1,60 +0,0 @@ -""" -Tests for DataFrame timezone-related methods -""" -import numpy as np -import pytest - -from pandas.core.dtypes.dtypes import DatetimeTZDtype - -from pandas import DataFrame, Series -import pandas._testing as tm -from pandas.core.indexes.datetimes import date_range - - -class TestDataFrameTimezones: - @pytest.mark.parametrize("tz", ["US/Eastern", "dateutil/US/Eastern"]) - def test_frame_no_datetime64_dtype(self, tz): - # after GH#7822 - # these retain the timezones on dict construction - dr = date_range("2011/1/1", "2012/1/1", freq="W-FRI") - dr_tz = dr.tz_localize(tz) - df = DataFrame({"A": "foo", "B": dr_tz}, index=dr) - tz_expected = DatetimeTZDtype("ns", dr_tz.tzinfo) - assert df["B"].dtype == tz_expected - - # GH#2810 (with timezones) - datetimes_naive = [ts.to_pydatetime() for ts in dr] - datetimes_with_tz = [ts.to_pydatetime() for ts in dr_tz] - df = DataFrame({"dr": dr}) - df["dr_tz"] = dr_tz - df["datetimes_naive"] = datetimes_naive - df["datetimes_with_tz"] = datetimes_with_tz - result = df.dtypes - expected = Series( - [ - np.dtype("datetime64[ns]"), - DatetimeTZDtype(tz=tz), - np.dtype("datetime64[ns]"), - DatetimeTZDtype(tz=tz), - ], - index=["dr", "dr_tz", "datetimes_naive", "datetimes_with_tz"], - ) - tm.assert_series_equal(result, expected) - - @pytest.mark.parametrize("tz", ["US/Eastern", "dateutil/US/Eastern"]) - def test_frame_reset_index(self, tz): - dr = date_range("2012-06-02", periods=10, tz=tz) - df = DataFrame(np.random.randn(len(dr)), dr) - roundtripped = df.reset_index().set_index("index") - xp = df.index.tz - rs = roundtripped.index.tz - assert xp == rs - - @pytest.mark.parametrize("tz", [None, "America/New_York"]) - def test_boolean_compare_transpose_tzindex_with_dst(self, tz): - # GH 19970 - idx = date_range("20161101", "20161130", freq="4H", tz=tz) - df = DataFrame({"a": range(len(idx)), "b": range(len(idx))}, index=idx) - result = df.T == df.T - expected = DataFrame(True, index=list("ab"), columns=idx) - tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/series/indexing/test_getitem.py b/pandas/tests/series/indexing/test_getitem.py index f8517c3b91fc1..5e87f8f6c1059 100644 --- a/pandas/tests/series/indexing/test_getitem.py +++ b/pandas/tests/series/indexing/test_getitem.py @@ -271,6 +271,21 @@ def test_getitem_boolean_different_order(self, string_series): exp = string_series[string_series > 0] tm.assert_series_equal(sel, exp) + def test_getitem_boolean_contiguous_preserve_freq(self): + rng = date_range("1/1/2000", "3/1/2000", freq="B") + + mask = np.zeros(len(rng), dtype=bool) + mask[10:20] = True + + masked = rng[mask] + expected = rng[10:20] + assert expected.freq == rng.freq + tm.assert_index_equal(masked, expected) + + mask[22] = True + masked = rng[mask] + assert masked.freq is None + class TestGetitemCallable: def test_getitem_callable(self): diff --git a/pandas/tests/series/methods/test_is_monotonic.py b/pandas/tests/series/methods/test_is_monotonic.py new file mode 100644 index 0000000000000..b242b293cb59e --- /dev/null +++ b/pandas/tests/series/methods/test_is_monotonic.py @@ -0,0 +1,25 @@ +import numpy as np + +from pandas import Series, date_range + + +class TestIsMonotonic: + def test_is_monotonic_numeric(self): + + ser = Series(np.random.randint(0, 10, size=1000)) + assert not ser.is_monotonic + ser = Series(np.arange(1000)) + assert ser.is_monotonic is True + assert ser.is_monotonic_increasing is True + ser = Series(np.arange(1000, 0, -1)) + assert ser.is_monotonic_decreasing is True + + def test_is_monotonic_dt64(self): + + ser = Series(date_range("20130101", periods=10)) + assert ser.is_monotonic is True + assert ser.is_monotonic_increasing is True + + ser = Series(list(reversed(ser))) + assert ser.is_monotonic is False + assert ser.is_monotonic_decreasing is True diff --git a/pandas/tests/series/methods/test_view.py b/pandas/tests/series/methods/test_view.py new file mode 100644 index 0000000000000..ccf3aa0d90e6f --- /dev/null +++ b/pandas/tests/series/methods/test_view.py @@ -0,0 +1,18 @@ +from pandas import Series, date_range +import pandas._testing as tm + + +class TestView: + def test_view_tz(self): + # GH#24024 + ser = Series(date_range("2000", periods=4, tz="US/Central")) + result = ser.view("i8") + expected = Series( + [ + 946706400000000000, + 946792800000000000, + 946879200000000000, + 946965600000000000, + ] + ) + tm.assert_series_equal(result, expected) diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py deleted file mode 100644 index ebb75adde5b13..0000000000000 --- a/pandas/tests/series/test_analytics.py +++ /dev/null @@ -1,23 +0,0 @@ -import numpy as np - -import pandas as pd -from pandas import Series - - -class TestSeriesAnalytics: - def test_is_monotonic(self): - - s = Series(np.random.randint(0, 10, size=1000)) - assert not s.is_monotonic - s = Series(np.arange(1000)) - assert s.is_monotonic is True - assert s.is_monotonic_increasing is True - s = Series(np.arange(1000, 0, -1)) - assert s.is_monotonic_decreasing is True - - s = Series(pd.date_range("20130101", periods=10)) - assert s.is_monotonic is True - assert s.is_monotonic_increasing is True - s = Series(list(reversed(s.tolist()))) - assert s.is_monotonic is False - assert s.is_monotonic_decreasing is True diff --git a/pandas/tests/series/test_arithmetic.py b/pandas/tests/series/test_arithmetic.py index 4920796f661fb..c595861d35934 100644 --- a/pandas/tests/series/test_arithmetic.py +++ b/pandas/tests/series/test_arithmetic.py @@ -15,6 +15,7 @@ Index, IntervalIndex, Series, + Timedelta, bdate_range, date_range, isna, @@ -277,6 +278,25 @@ def test_alignment_doesnt_change_tz(self): assert ser.index is dti assert ser_utc.index is dti_utc + def test_arithmetic_with_duplicate_index(self): + + # GH#8363 + # integer ops with a non-unique index + index = [2, 2, 3, 3, 4] + ser = Series(np.arange(1, 6, dtype="int64"), index=index) + other = Series(np.arange(5, dtype="int64"), index=index) + result = ser - other + expected = Series(1, index=[2, 2, 3, 3, 4]) + tm.assert_series_equal(result, expected) + + # GH#8363 + # datetime ops with a non-unique index + ser = Series(date_range("20130101 09:00:00", periods=5), index=index) + other = Series(date_range("20130101", periods=5), index=index) + result = ser - other + expected = Series(Timedelta("9 hours"), index=[2, 2, 3, 3, 4]) + tm.assert_series_equal(result, expected) + # ------------------------------------------------------------------ # Comparisons diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 5c4118bc40f4d..c8fbbcf9aed20 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -1045,6 +1045,16 @@ def test_constructor_infer_period(self, data_constructor): tm.assert_series_equal(result, expected) assert result.dtype == "Period[D]" + @pytest.mark.xfail(reason="PeriodDtype Series not supported yet") + def test_construct_from_ints_including_iNaT_scalar_period_dtype(self): + series = Series([0, 1000, 2000, pd._libs.iNaT], dtype="period[D]") + + val = series[3] + assert isna(val) + + series[2] = val + assert isna(series[2]) + def test_constructor_period_incompatible_frequency(self): data = [pd.Period("2000", "D"), pd.Period("2001", "A")] result = Series(data) diff --git a/pandas/tests/series/test_period.py b/pandas/tests/series/test_period.py index d079111aa12d6..17dbfa9cf379a 100644 --- a/pandas/tests/series/test_period.py +++ b/pandas/tests/series/test_period.py @@ -1,36 +1,24 @@ import numpy as np -import pytest -import pandas as pd from pandas import DataFrame, Series, period_range class TestSeriesPeriod: - def setup_method(self, method): - self.series = Series(period_range("2000-01-01", periods=10, freq="D")) # --------------------------------------------------------------------- # NaT support - @pytest.mark.xfail(reason="PeriodDtype Series not supported yet") - def test_NaT_scalar(self): - series = Series([0, 1000, 2000, pd._libs.iNaT], dtype="period[D]") - - val = series[3] - assert pd.isna(val) - - series[2] = val - assert pd.isna(series[2]) - def test_intercept_astype_object(self): - expected = self.series.astype("object") + series = Series(period_range("2000-01-01", periods=10, freq="D")) + + expected = series.astype("object") - df = DataFrame({"a": self.series, "b": np.random.randn(len(self.series))}) + df = DataFrame({"a": series, "b": np.random.randn(len(series))}) result = df.values.squeeze() assert (result[:, 0] == expected.values).all() - df = DataFrame({"a": self.series, "b": ["foo"] * len(self.series)}) + df = DataFrame({"a": series, "b": ["foo"] * len(series)}) result = df.values.squeeze() assert (result[:, 0] == expected.values).all() diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py index 8b32be45e8d57..0769606d18d57 100644 --- a/pandas/tests/series/test_timeseries.py +++ b/pandas/tests/series/test_timeseries.py @@ -1,26 +1,10 @@ import numpy as np -import pandas as pd from pandas import DataFrame, Series, date_range, timedelta_range import pandas._testing as tm class TestTimeSeries: - def test_contiguous_boolean_preserve_freq(self): - rng = date_range("1/1/2000", "3/1/2000", freq="B") - - mask = np.zeros(len(rng), dtype=bool) - mask[10:20] = True - - masked = rng[mask] - expected = rng[10:20] - assert expected.freq == rng.freq - tm.assert_index_equal(masked, expected) - - mask[22] = True - masked = rng[mask] - assert masked.freq is None - def test_promote_datetime_date(self): rng = date_range("1/1/2000", periods=20) ts = Series(np.random.randn(20), index=rng) @@ -55,17 +39,3 @@ def f(x): s.map(f) s.apply(f) DataFrame(s).applymap(f) - - def test_view_tz(self): - # GH#24024 - ser = Series(pd.date_range("2000", periods=4, tz="US/Central")) - result = ser.view("i8") - expected = Series( - [ - 946706400000000000, - 946792800000000000, - 946879200000000000, - 946965600000000000, - ] - ) - tm.assert_series_equal(result, expected)