From 67b087f5c07cf624595caca6c3d290503dbf92ff Mon Sep 17 00:00:00 2001
From: Joris Van den Bossche <jorisvandenbossche@gmail.com>
Date: Tue, 3 Nov 2020 20:18:25 +0100
Subject: [PATCH 01/21] ERR: fix error message in Period for invalid frequency
 (#37602)

---
 pandas/_libs/tslibs/period.pyx            | 2 +-
 pandas/tests/scalar/period/test_period.py | 6 ++++++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx
index b1f9ff71f5faa..b817d80c64ccd 100644
--- a/pandas/_libs/tslibs/period.pyx
+++ b/pandas/_libs/tslibs/period.pyx
@@ -2438,7 +2438,7 @@ cpdef int freq_to_dtype_code(BaseOffset freq) except? -1:
     try:
         return freq._period_dtype_code
     except AttributeError as err:
-        raise ValueError(INVALID_FREQ_ERR_MSG) from err
+        raise ValueError(INVALID_FREQ_ERR_MSG.format(freq)) from err
 
 
 cdef int64_t _ordinal_from_fields(int year, int month, quarter, int day,
diff --git a/pandas/tests/scalar/period/test_period.py b/pandas/tests/scalar/period/test_period.py
index f150e5e5b18b2..46bc6421c2070 100644
--- a/pandas/tests/scalar/period/test_period.py
+++ b/pandas/tests/scalar/period/test_period.py
@@ -1554,3 +1554,9 @@ def test_negone_ordinals():
     repr(period)
     period = Period(ordinal=-1, freq="W")
     repr(period)
+
+
+def test_invalid_frequency_error_message():
+    msg = "Invalid frequency: <WeekOfMonth: week=0, weekday=0>"
+    with pytest.raises(ValueError, match=msg):
+        Period("2012-01-02", freq="WOM-1MON")

From 7d40d3ea53da635f4074ef98f84a3a8c6aa24166 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Tue, 3 Nov 2020 15:09:05 -0800
Subject: [PATCH 02/21] CLN: remove rebox_native (#37608)

---
 pandas/core/arrays/datetimelike.py       | 5 +++--
 pandas/core/arrays/datetimes.py          | 9 +++------
 pandas/core/arrays/period.py             | 8 ++------
 pandas/core/arrays/timedeltas.py         | 8 ++------
 pandas/tests/arrays/test_datetimelike.py | 5 +++--
 5 files changed, 13 insertions(+), 22 deletions(-)

diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
index 579719d8bac3b..1955a96160a4a 100644
--- a/pandas/core/arrays/datetimelike.py
+++ b/pandas/core/arrays/datetimelike.py
@@ -151,7 +151,9 @@ def _rebox_native(cls, value: int) -> Union[int, np.datetime64, np.timedelta64]:
         """
         raise AbstractMethodError(cls)
 
-    def _unbox_scalar(self, value: DTScalarOrNaT, setitem: bool = False) -> int:
+    def _unbox_scalar(
+        self, value: DTScalarOrNaT, setitem: bool = False
+    ) -> Union[np.int64, np.datetime64, np.timedelta64]:
         """
         Unbox the integer value of a scalar `value`.
 
@@ -636,7 +638,6 @@ def _unbox(
         """
         if lib.is_scalar(other):
             other = self._unbox_scalar(other, setitem=setitem)
-            other = self._rebox_native(other)
         else:
             # same type as self
             self._check_compatible_with(other, setitem=setitem)
diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py
index b05271552f117..f655d10881011 100644
--- a/pandas/core/arrays/datetimes.py
+++ b/pandas/core/arrays/datetimes.py
@@ -454,16 +454,13 @@ def _generate_range(
     # -----------------------------------------------------------------
     # DatetimeLike Interface
 
-    @classmethod
-    def _rebox_native(cls, value: int) -> np.datetime64:
-        return np.int64(value).view("M8[ns]")
-
-    def _unbox_scalar(self, value, setitem: bool = False):
+    def _unbox_scalar(self, value, setitem: bool = False) -> np.datetime64:
         if not isinstance(value, self._scalar_type) and value is not NaT:
             raise ValueError("'value' should be a Timestamp.")
         if not isna(value):
             self._check_compatible_with(value, setitem=setitem)
-        return value.value
+            return value.asm8
+        return np.datetime64(value.value, "ns")
 
     def _scalar_from_string(self, value):
         return Timestamp(value, tz=self.tz)
diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py
index b95a7acc19b1f..d808ade53ad33 100644
--- a/pandas/core/arrays/period.py
+++ b/pandas/core/arrays/period.py
@@ -260,18 +260,14 @@ def _generate_range(cls, start, end, periods, freq, fields):
     # -----------------------------------------------------------------
     # DatetimeLike Interface
 
-    @classmethod
-    def _rebox_native(cls, value: int) -> np.int64:
-        return np.int64(value)
-
     def _unbox_scalar(
         self, value: Union[Period, NaTType], setitem: bool = False
     ) -> int:
         if value is NaT:
-            return value.value
+            return np.int64(value.value)
         elif isinstance(value, self._scalar_type):
             self._check_compatible_with(value, setitem=setitem)
-            return value.ordinal
+            return np.int64(value.ordinal)
         else:
             raise ValueError(f"'value' should be a Period. Got '{value}' instead.")
 
diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py
index e5b56ae80b578..e4a844fd4c6ef 100644
--- a/pandas/core/arrays/timedeltas.py
+++ b/pandas/core/arrays/timedeltas.py
@@ -301,15 +301,11 @@ def _generate_range(cls, start, end, periods, freq, closed=None):
     # ----------------------------------------------------------------
     # DatetimeLike Interface
 
-    @classmethod
-    def _rebox_native(cls, value: int) -> np.timedelta64:
-        return np.int64(value).view("m8[ns]")
-
-    def _unbox_scalar(self, value, setitem: bool = False):
+    def _unbox_scalar(self, value, setitem: bool = False) -> np.timedelta64:
         if not isinstance(value, self._scalar_type) and value is not NaT:
             raise ValueError("'value' should be a Timedelta.")
         self._check_compatible_with(value, setitem=setitem)
-        return value.value
+        return np.timedelta64(value.value, "ns")
 
     def _scalar_from_string(self, value):
         return Timedelta(value)
diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py
index b9298e9dec5b5..ec20c829f1544 100644
--- a/pandas/tests/arrays/test_datetimelike.py
+++ b/pandas/tests/arrays/test_datetimelike.py
@@ -191,10 +191,11 @@ def test_unbox_scalar(self):
         data = np.arange(10, dtype="i8") * 24 * 3600 * 10 ** 9
         arr = self.array_cls(data, freq="D")
         result = arr._unbox_scalar(arr[0])
-        assert isinstance(result, int)
+        expected = arr._data.dtype.type
+        assert isinstance(result, expected)
 
         result = arr._unbox_scalar(pd.NaT)
-        assert isinstance(result, int)
+        assert isinstance(result, expected)
 
         msg = f"'value' should be a {self.dtype.__name__}."
         with pytest.raises(ValueError, match=msg):

From 93e3477617531e1006eb98e87ddb7cbf1fb21797 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Tue, 3 Nov 2020 17:50:29 -0800
Subject: [PATCH 03/21] TST/REF: tests.generic (#37618)

---
 pandas/tests/frame/methods/test_equals.py     |  57 ++++++++-
 pandas/tests/frame/methods/test_head_tail.py  |  24 ++++
 .../generic/methods/test_first_valid_index.py |   5 +-
 pandas/tests/generic/methods/test_pipe.py     |  15 +--
 .../generic/methods/test_reorder_levels.py    |  11 +-
 pandas/tests/generic/methods/test_sample.py   |  10 +-
 pandas/tests/generic/test_generic.py          | 111 +++---------------
 7 files changed, 112 insertions(+), 121 deletions(-)

diff --git a/pandas/tests/frame/methods/test_equals.py b/pandas/tests/frame/methods/test_equals.py
index c024390297fec..de2509ed91be2 100644
--- a/pandas/tests/frame/methods/test_equals.py
+++ b/pandas/tests/frame/methods/test_equals.py
@@ -1,4 +1,6 @@
-from pandas import DataFrame
+import numpy as np
+
+from pandas import DataFrame, date_range
 import pandas._testing as tm
 
 
@@ -21,3 +23,56 @@ def test_equals_different_blocks(self):
         tm.assert_frame_equal(df0, df1)
         assert df0.equals(df1)
         assert df1.equals(df0)
+
+    def test_equals(self):
+        # Add object dtype column with nans
+        index = np.random.random(10)
+        df1 = DataFrame(np.random.random(10), index=index, columns=["floats"])
+        df1["text"] = "the sky is so blue. we could use more chocolate.".split()
+        df1["start"] = date_range("2000-1-1", periods=10, freq="T")
+        df1["end"] = date_range("2000-1-1", periods=10, freq="D")
+        df1["diff"] = df1["end"] - df1["start"]
+        df1["bool"] = np.arange(10) % 3 == 0
+        df1.loc[::2] = np.nan
+        df2 = df1.copy()
+        assert df1["text"].equals(df2["text"])
+        assert df1["start"].equals(df2["start"])
+        assert df1["end"].equals(df2["end"])
+        assert df1["diff"].equals(df2["diff"])
+        assert df1["bool"].equals(df2["bool"])
+        assert df1.equals(df2)
+        assert not df1.equals(object)
+
+        # different dtype
+        different = df1.copy()
+        different["floats"] = different["floats"].astype("float32")
+        assert not df1.equals(different)
+
+        # different index
+        different_index = -index
+        different = df2.set_index(different_index)
+        assert not df1.equals(different)
+
+        # different columns
+        different = df2.copy()
+        different.columns = df2.columns[::-1]
+        assert not df1.equals(different)
+
+        # DatetimeIndex
+        index = date_range("2000-1-1", periods=10, freq="T")
+        df1 = df1.set_index(index)
+        df2 = df1.copy()
+        assert df1.equals(df2)
+
+        # MultiIndex
+        df3 = df1.set_index(["text"], append=True)
+        df2 = df1.set_index(["text"], append=True)
+        assert df3.equals(df2)
+
+        df2 = df1.set_index(["floats"], append=True)
+        assert not df3.equals(df2)
+
+        # NaN in index
+        df3 = df1.set_index(["floats"], append=True)
+        df2 = df1.set_index(["floats"], append=True)
+        assert df3.equals(df2)
diff --git a/pandas/tests/frame/methods/test_head_tail.py b/pandas/tests/frame/methods/test_head_tail.py
index 93763bc12ce0d..fa28f7d3e16a2 100644
--- a/pandas/tests/frame/methods/test_head_tail.py
+++ b/pandas/tests/frame/methods/test_head_tail.py
@@ -4,6 +4,30 @@
 import pandas._testing as tm
 
 
+def test_head_tail_generic(index, frame_or_series):
+    # GH#5370
+
+    ndim = 2 if frame_or_series is DataFrame else 1
+    shape = (len(index),) * ndim
+    vals = np.random.randn(*shape)
+    obj = frame_or_series(vals, index=index)
+
+    tm.assert_equal(obj.head(), obj.iloc[:5])
+    tm.assert_equal(obj.tail(), obj.iloc[-5:])
+
+    # 0-len
+    tm.assert_equal(obj.head(0), obj.iloc[0:0])
+    tm.assert_equal(obj.tail(0), obj.iloc[0:0])
+
+    # bounded
+    tm.assert_equal(obj.head(len(obj) + 1), obj)
+    tm.assert_equal(obj.tail(len(obj) + 1), obj)
+
+    # neg index
+    tm.assert_equal(obj.head(-3), obj.head(len(index) - 3))
+    tm.assert_equal(obj.tail(-3), obj.tail(len(index) - 3))
+
+
 def test_head_tail(float_frame):
     tm.assert_frame_equal(float_frame.head(), float_frame[:5])
     tm.assert_frame_equal(float_frame.tail(), float_frame[-5:])
diff --git a/pandas/tests/generic/methods/test_first_valid_index.py b/pandas/tests/generic/methods/test_first_valid_index.py
index bca3452c3c458..8d021f0e3954e 100644
--- a/pandas/tests/generic/methods/test_first_valid_index.py
+++ b/pandas/tests/generic/methods/test_first_valid_index.py
@@ -9,10 +9,9 @@
 
 
 class TestFirstValidIndex:
-    @pytest.mark.parametrize("klass", [Series, DataFrame])
-    def test_first_valid_index_single_nan(self, klass):
+    def test_first_valid_index_single_nan(self, frame_or_series):
         # GH#9752 Series/DataFrame should both return None, not raise
-        obj = klass([np.nan])
+        obj = frame_or_series([np.nan])
 
         assert obj.first_valid_index() is None
         assert obj.iloc[:0].first_valid_index() is None
diff --git a/pandas/tests/generic/methods/test_pipe.py b/pandas/tests/generic/methods/test_pipe.py
index 59e5edc4b8bb5..b378600634bf0 100644
--- a/pandas/tests/generic/methods/test_pipe.py
+++ b/pandas/tests/generic/methods/test_pipe.py
@@ -5,11 +5,10 @@
 
 
 class TestPipe:
-    @pytest.mark.parametrize("klass", [Series, DataFrame])
-    def test_pipe(self, klass):
+    def test_pipe(self, frame_or_series):
         obj = DataFrame({"A": [1, 2, 3]})
         expected = DataFrame({"A": [1, 4, 9]})
-        if klass is Series:
+        if frame_or_series is Series:
             obj = obj["A"]
             expected = expected["A"]
 
@@ -17,20 +16,18 @@ def test_pipe(self, klass):
         result = obj.pipe(f, 2)
         tm.assert_equal(result, expected)
 
-    @pytest.mark.parametrize("klass", [Series, DataFrame])
-    def test_pipe_tuple(self, klass):
+    def test_pipe_tuple(self, frame_or_series):
         obj = DataFrame({"A": [1, 2, 3]})
-        if klass is Series:
+        if frame_or_series is Series:
             obj = obj["A"]
 
         f = lambda x, y: y
         result = obj.pipe((f, "y"), 0)
         tm.assert_equal(result, obj)
 
-    @pytest.mark.parametrize("klass", [Series, DataFrame])
-    def test_pipe_tuple_error(self, klass):
+    def test_pipe_tuple_error(self, frame_or_series):
         obj = DataFrame({"A": [1, 2, 3]})
-        if klass is Series:
+        if frame_or_series is Series:
             obj = obj["A"]
 
         f = lambda x, y: y
diff --git a/pandas/tests/generic/methods/test_reorder_levels.py b/pandas/tests/generic/methods/test_reorder_levels.py
index 8bb6417e56659..6bfbf089a6108 100644
--- a/pandas/tests/generic/methods/test_reorder_levels.py
+++ b/pandas/tests/generic/methods/test_reorder_levels.py
@@ -1,20 +1,19 @@
 import numpy as np
 import pytest
 
-from pandas import DataFrame, MultiIndex, Series
+from pandas import DataFrame, MultiIndex
 import pandas._testing as tm
 
 
 class TestReorderLevels:
-    @pytest.mark.parametrize("klass", [Series, DataFrame])
-    def test_reorder_levels(self, klass):
+    def test_reorder_levels(self, frame_or_series):
         index = MultiIndex(
             levels=[["bar"], ["one", "two", "three"], [0, 1]],
             codes=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]],
             names=["L0", "L1", "L2"],
         )
         df = DataFrame({"A": np.arange(6), "B": np.arange(6)}, index=index)
-        obj = df if klass is DataFrame else df["A"]
+        obj = df if frame_or_series is DataFrame else df["A"]
 
         # no change, position
         result = obj.reorder_levels([0, 1, 2])
@@ -32,7 +31,7 @@ def test_reorder_levels(self, klass):
             names=["L1", "L2", "L0"],
         )
         expected = DataFrame({"A": np.arange(6), "B": np.arange(6)}, index=e_idx)
-        expected = expected if klass is DataFrame else expected["A"]
+        expected = expected if frame_or_series is DataFrame else expected["A"]
         tm.assert_equal(result, expected)
 
         result = obj.reorder_levels([0, 0, 0])
@@ -42,7 +41,7 @@ def test_reorder_levels(self, klass):
             names=["L0", "L0", "L0"],
         )
         expected = DataFrame({"A": np.arange(6), "B": np.arange(6)}, index=e_idx)
-        expected = expected if klass is DataFrame else expected["A"]
+        expected = expected if frame_or_series is DataFrame else expected["A"]
         tm.assert_equal(result, expected)
 
         result = obj.reorder_levels(["L0", "L0", "L0"])
diff --git a/pandas/tests/generic/methods/test_sample.py b/pandas/tests/generic/methods/test_sample.py
index 7303dad9170ed..b26a3785f918d 100644
--- a/pandas/tests/generic/methods/test_sample.py
+++ b/pandas/tests/generic/methods/test_sample.py
@@ -155,22 +155,20 @@ def test_sample_none_weights(self, obj):
             ),
         ],
     )
-    @pytest.mark.parametrize("klass", [Series, DataFrame])
-    def test_sample_random_state(self, func_str, arg, klass):
+    def test_sample_random_state(self, func_str, arg, frame_or_series):
         # GH#32503
         obj = DataFrame({"col1": range(10, 20), "col2": range(20, 30)})
-        if klass is Series:
+        if frame_or_series is Series:
             obj = obj["col1"]
         result = obj.sample(n=3, random_state=eval(func_str)(arg))
         expected = obj.sample(n=3, random_state=com.random_state(eval(func_str)(arg)))
         tm.assert_equal(result, expected)
 
-    @pytest.mark.parametrize("klass", [Series, DataFrame])
-    def test_sample_upsampling_without_replacement(self, klass):
+    def test_sample_upsampling_without_replacement(self, frame_or_series):
         # GH#27451
 
         obj = DataFrame({"A": list("abc")})
-        if klass is Series:
+        if frame_or_series is Series:
             obj = obj["A"]
 
         msg = (
diff --git a/pandas/tests/generic/test_generic.py b/pandas/tests/generic/test_generic.py
index 45601abc95fe6..930c48cbdc214 100644
--- a/pandas/tests/generic/test_generic.py
+++ b/pandas/tests/generic/test_generic.py
@@ -5,8 +5,7 @@
 
 from pandas.core.dtypes.common import is_scalar
 
-import pandas as pd
-from pandas import DataFrame, Series, date_range
+from pandas import DataFrame, Series
 import pandas._testing as tm
 
 # ----------------------------------------------------------------------
@@ -248,31 +247,6 @@ def test_metadata_propagation(self):
             self.check_metadata(v1 & v2)
             self.check_metadata(v1 | v2)
 
-    def test_head_tail(self, index):
-        # GH5370
-
-        o = self._construct(shape=len(index))
-
-        axis = o._get_axis_name(0)
-        setattr(o, axis, index)
-
-        o.head()
-
-        self._compare(o.head(), o.iloc[:5])
-        self._compare(o.tail(), o.iloc[-5:])
-
-        # 0-len
-        self._compare(o.head(0), o.iloc[0:0])
-        self._compare(o.tail(0), o.iloc[0:0])
-
-        # bounded
-        self._compare(o.head(len(o) + 1), o)
-        self._compare(o.tail(len(o) + 1), o)
-
-        # neg index
-        self._compare(o.head(-3), o.head(len(index) - 3))
-        self._compare(o.tail(-3), o.tail(len(index) - 3))
-
     def test_size_compat(self):
         # GH8846
         # size property should be defined
@@ -460,77 +434,23 @@ def test_take_invalid_kwargs(self):
                 obj.take(indices, mode="clip")
 
     @pytest.mark.parametrize("is_copy", [True, False])
-    def test_depr_take_kwarg_is_copy(self, is_copy):
+    def test_depr_take_kwarg_is_copy(self, is_copy, frame_or_series):
         # GH 27357
-        df = DataFrame({"A": [1, 2, 3]})
+        obj = DataFrame({"A": [1, 2, 3]})
+        if frame_or_series is Series:
+            obj = obj["A"]
+
         msg = (
             "is_copy is deprecated and will be removed in a future version. "
             "'take' always returns a copy, so there is no need to specify this."
         )
         with tm.assert_produces_warning(FutureWarning) as w:
-            df.take([0, 1], is_copy=is_copy)
+            obj.take([0, 1], is_copy=is_copy)
 
         assert w[0].message.args[0] == msg
 
-        s = Series([1, 2, 3])
-        with tm.assert_produces_warning(FutureWarning):
-            s.take([0, 1], is_copy=is_copy)
-
-    def test_equals(self):
-        # Add object dtype column with nans
-        index = np.random.random(10)
-        df1 = DataFrame(np.random.random(10), index=index, columns=["floats"])
-        df1["text"] = "the sky is so blue. we could use more chocolate.".split()
-        df1["start"] = date_range("2000-1-1", periods=10, freq="T")
-        df1["end"] = date_range("2000-1-1", periods=10, freq="D")
-        df1["diff"] = df1["end"] - df1["start"]
-        df1["bool"] = np.arange(10) % 3 == 0
-        df1.loc[::2] = np.nan
-        df2 = df1.copy()
-        assert df1["text"].equals(df2["text"])
-        assert df1["start"].equals(df2["start"])
-        assert df1["end"].equals(df2["end"])
-        assert df1["diff"].equals(df2["diff"])
-        assert df1["bool"].equals(df2["bool"])
-        assert df1.equals(df2)
-        assert not df1.equals(object)
-
-        # different dtype
-        different = df1.copy()
-        different["floats"] = different["floats"].astype("float32")
-        assert not df1.equals(different)
-
-        # different index
-        different_index = -index
-        different = df2.set_index(different_index)
-        assert not df1.equals(different)
-
-        # different columns
-        different = df2.copy()
-        different.columns = df2.columns[::-1]
-        assert not df1.equals(different)
-
-        # DatetimeIndex
-        index = pd.date_range("2000-1-1", periods=10, freq="T")
-        df1 = df1.set_index(index)
-        df2 = df1.copy()
-        assert df1.equals(df2)
-
-        # MultiIndex
-        df3 = df1.set_index(["text"], append=True)
-        df2 = df1.set_index(["text"], append=True)
-        assert df3.equals(df2)
-
-        df2 = df1.set_index(["floats"], append=True)
-        assert not df3.equals(df2)
-
-        # NaN in index
-        df3 = df1.set_index(["floats"], append=True)
-        df2 = df1.set_index(["floats"], append=True)
-        assert df3.equals(df2)
-
-    @pytest.mark.parametrize("box", [pd.Series, pd.DataFrame])
-    def test_axis_classmethods(self, box):
+    def test_axis_classmethods(self, frame_or_series):
+        box = frame_or_series
         obj = box(dtype=object)
         values = box._AXIS_TO_AXIS_NUMBER.keys()
         for v in values:
@@ -538,24 +458,23 @@ def test_axis_classmethods(self, box):
             assert obj._get_axis_name(v) == box._get_axis_name(v)
             assert obj._get_block_manager_axis(v) == box._get_block_manager_axis(v)
 
-    @pytest.mark.parametrize("box", [pd.Series, pd.DataFrame])
-    def test_axis_names_deprecated(self, box):
+    def test_axis_names_deprecated(self, frame_or_series):
         # GH33637
+        box = frame_or_series
         obj = box(dtype=object)
         with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
             obj._AXIS_NAMES
 
-    @pytest.mark.parametrize("box", [pd.Series, pd.DataFrame])
-    def test_axis_numbers_deprecated(self, box):
+    def test_axis_numbers_deprecated(self, frame_or_series):
         # GH33637
+        box = frame_or_series
         obj = box(dtype=object)
         with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
             obj._AXIS_NUMBERS
 
-    @pytest.mark.parametrize("as_frame", [True, False])
-    def test_flags_identity(self, as_frame):
+    def test_flags_identity(self, frame_or_series):
         s = Series([1, 2])
-        if as_frame:
+        if frame_or_series is DataFrame:
             s = s.to_frame()
 
         assert s.flags is s.flags

From e0d1c7e1bd4beca7a0389115f1b6d681bb2fad48 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Tue, 3 Nov 2020 17:51:33 -0800
Subject: [PATCH 04/21] TST: collect tests by method (#37617)

* TST/REF: collect test_timeseries tests by method

* misplaced DataFrame.values tst

* misplaced dataframe.values test

* collect test by method
---
 pandas/tests/frame/methods/test_asfreq.py     | 11 ++++
 pandas/tests/frame/methods/test_values.py     | 19 ++++++-
 .../tests/indexes/datetimes/test_indexing.py  |  7 +++
 .../tests/series/apply/test_series_apply.py   | 13 ++++-
 pandas/tests/series/methods/test_values.py    | 20 +++++++
 pandas/tests/series/test_arithmetic.py        | 15 ++++++
 pandas/tests/series/test_dtypes.py            | 52 ++++---------------
 pandas/tests/series/test_period.py            | 24 ---------
 pandas/tests/series/test_timeseries.py        | 41 ---------------
 9 files changed, 93 insertions(+), 109 deletions(-)
 create mode 100644 pandas/tests/series/methods/test_values.py
 delete mode 100644 pandas/tests/series/test_period.py
 delete mode 100644 pandas/tests/series/test_timeseries.py

diff --git a/pandas/tests/frame/methods/test_asfreq.py b/pandas/tests/frame/methods/test_asfreq.py
index cdcd922949bcf..368ce88abe165 100644
--- a/pandas/tests/frame/methods/test_asfreq.py
+++ b/pandas/tests/frame/methods/test_asfreq.py
@@ -74,3 +74,14 @@ def test_asfreq_fillvalue(self):
         expected_series = ts.asfreq(freq="1S").fillna(9.0)
         actual_series = ts.asfreq(freq="1S", fill_value=9.0)
         tm.assert_series_equal(expected_series, actual_series)
+
+    def test_asfreq_with_date_object_index(self, frame_or_series):
+        rng = date_range("1/1/2000", periods=20)
+        ts = frame_or_series(np.random.randn(20), index=rng)
+
+        ts2 = ts.copy()
+        ts2.index = [x.date() for x in ts2.index]
+
+        result = ts2.asfreq("4H", method="ffill")
+        expected = ts.asfreq("4H", method="ffill")
+        tm.assert_equal(result, expected)
diff --git a/pandas/tests/frame/methods/test_values.py b/pandas/tests/frame/methods/test_values.py
index 564a659724768..fb0c5d31f692b 100644
--- a/pandas/tests/frame/methods/test_values.py
+++ b/pandas/tests/frame/methods/test_values.py
@@ -1,6 +1,7 @@
 import numpy as np
+import pytest
 
-from pandas import DataFrame, NaT, Timestamp, date_range
+from pandas import DataFrame, NaT, Series, Timestamp, date_range, period_range
 import pandas._testing as tm
 
 
@@ -44,6 +45,22 @@ def test_values_duplicates(self):
 
         tm.assert_numpy_array_equal(result, expected)
 
+    @pytest.mark.parametrize("constructor", [date_range, period_range])
+    def test_values_casts_datetimelike_to_object(self, constructor):
+        series = Series(constructor("2000-01-01", periods=10, freq="D"))
+
+        expected = series.astype("object")
+
+        df = DataFrame({"a": series, "b": np.random.randn(len(series))})
+
+        result = df.values.squeeze()
+        assert (result[:, 0] == expected.values).all()
+
+        df = DataFrame({"a": series, "b": ["foo"] * len(series)})
+
+        result = df.values.squeeze()
+        assert (result[:, 0] == expected.values).all()
+
     def test_frame_values_with_tz(self):
         tz = "US/Central"
         df = DataFrame({"A": date_range("2000", periods=4, tz=tz)})
diff --git a/pandas/tests/indexes/datetimes/test_indexing.py b/pandas/tests/indexes/datetimes/test_indexing.py
index d4ebb557fd6cd..59269b9b54ddc 100644
--- a/pandas/tests/indexes/datetimes/test_indexing.py
+++ b/pandas/tests/indexes/datetimes/test_indexing.py
@@ -544,6 +544,13 @@ def test_contains_nonunique(self, vals):
 
 
 class TestGetIndexer:
+    def test_get_indexer_date_objs(self):
+        rng = date_range("1/1/2000", periods=20)
+
+        result = rng.get_indexer(rng.map(lambda x: x.date()))
+        expected = rng.get_indexer(rng)
+        tm.assert_numpy_array_equal(result, expected)
+
     def test_get_indexer(self):
         idx = pd.date_range("2000-01-01", periods=3)
         exp = np.array([0, 1, 2], dtype=np.intp)
diff --git a/pandas/tests/series/apply/test_series_apply.py b/pandas/tests/series/apply/test_series_apply.py
index 9096d2a1033e5..93431a5c75091 100644
--- a/pandas/tests/series/apply/test_series_apply.py
+++ b/pandas/tests/series/apply/test_series_apply.py
@@ -5,12 +5,23 @@
 import pytest
 
 import pandas as pd
-from pandas import DataFrame, Index, MultiIndex, Series, isna
+from pandas import DataFrame, Index, MultiIndex, Series, isna, timedelta_range
 import pandas._testing as tm
 from pandas.core.base import SpecificationError
 
 
 class TestSeriesApply:
+    def test_series_map_box_timedelta(self):
+        # GH#11349
+        ser = Series(timedelta_range("1 day 1 s", periods=5, freq="h"))
+
+        def f(x):
+            return x.total_seconds()
+
+        ser.map(f)
+        ser.apply(f)
+        DataFrame(ser).applymap(f)
+
     def test_apply(self, datetime_series):
         with np.errstate(all="ignore"):
             tm.assert_series_equal(
diff --git a/pandas/tests/series/methods/test_values.py b/pandas/tests/series/methods/test_values.py
new file mode 100644
index 0000000000000..e28a714ea656d
--- /dev/null
+++ b/pandas/tests/series/methods/test_values.py
@@ -0,0 +1,20 @@
+import numpy as np
+import pytest
+
+from pandas import IntervalIndex, Series, period_range
+import pandas._testing as tm
+
+
+class TestValues:
+    @pytest.mark.parametrize(
+        "data",
+        [
+            period_range("2000", periods=4),
+            IntervalIndex.from_breaks([1, 2, 3, 4]),
+        ],
+    )
+    def test_values_object_extension_dtypes(self, data):
+        # https://github.com/pandas-dev/pandas/issues/23995
+        result = Series(data).values
+        expected = np.array(data.astype(object))
+        tm.assert_numpy_array_equal(result, expected)
diff --git a/pandas/tests/series/test_arithmetic.py b/pandas/tests/series/test_arithmetic.py
index 9154c566a3dae..fa8f85178ba9f 100644
--- a/pandas/tests/series/test_arithmetic.py
+++ b/pandas/tests/series/test_arithmetic.py
@@ -730,6 +730,21 @@ def test_datetime_understood(self):
         expected = Series(pd.to_datetime(["2011-12-26", "2011-12-27", "2011-12-28"]))
         tm.assert_series_equal(result, expected)
 
+    def test_align_date_objects_with_datetimeindex(self):
+        rng = date_range("1/1/2000", periods=20)
+        ts = Series(np.random.randn(20), index=rng)
+
+        ts_slice = ts[5:]
+        ts2 = ts_slice.copy()
+        ts2.index = [x.date() for x in ts2.index]
+
+        result = ts + ts2
+        result2 = ts2 + ts
+        expected = ts + ts[5:]
+        expected.index = expected.index._with_freq(None)
+        tm.assert_series_equal(result, expected)
+        tm.assert_series_equal(result2, expected)
+
 
 @pytest.mark.parametrize(
     "names",
diff --git a/pandas/tests/series/test_dtypes.py b/pandas/tests/series/test_dtypes.py
index b85a53960b0f6..2fbed92567f71 100644
--- a/pandas/tests/series/test_dtypes.py
+++ b/pandas/tests/series/test_dtypes.py
@@ -6,7 +6,7 @@
 from pandas.core.dtypes.dtypes import CategoricalDtype
 
 import pandas as pd
-from pandas import Categorical, DataFrame, Series, date_range
+from pandas import Categorical, DataFrame, Series
 import pandas._testing as tm
 
 
@@ -120,18 +120,20 @@ def cmp(a, b):
             s.astype("object").astype(CategoricalDtype()), roundtrip_expected
         )
 
+    def test_invalid_conversions(self):
         # invalid conversion (these are NOT a dtype)
+        cat = Categorical([f"{i} - {i + 499}" for i in range(0, 10000, 500)])
+        ser = Series(np.random.randint(0, 10000, 100)).sort_values()
+        ser = pd.cut(ser, range(0, 10500, 500), right=False, labels=cat)
+
         msg = (
             "dtype '<class 'pandas.core.arrays.categorical.Categorical'>' "
             "not understood"
         )
-
-        for invalid in [
-            lambda x: x.astype(Categorical),
-            lambda x: x.astype("object").astype(Categorical),
-        ]:
-            with pytest.raises(TypeError, match=msg):
-                invalid(s)
+        with pytest.raises(TypeError, match=msg):
+            ser.astype(Categorical)
+        with pytest.raises(TypeError, match=msg):
+            ser.astype("object").astype(Categorical)
 
     @pytest.mark.parametrize("dtype", np.typecodes["All"])
     def test_astype_empty_constructor_equality(self, dtype):
@@ -148,27 +150,6 @@ def test_astype_empty_constructor_equality(self, dtype):
                 as_type_empty = Series([]).astype(dtype)
             tm.assert_series_equal(init_empty, as_type_empty)
 
-    def test_intercept_astype_object(self):
-        series = Series(date_range("1/1/2000", periods=10))
-
-        # This test no longer makes sense, as
-        # Series is by default already M8[ns].
-        expected = series.astype("object")
-
-        df = DataFrame({"a": series, "b": np.random.randn(len(series))})
-        exp_dtypes = Series(
-            [np.dtype("datetime64[ns]"), np.dtype("float64")], index=["a", "b"]
-        )
-        tm.assert_series_equal(df.dtypes, exp_dtypes)
-
-        result = df.values.squeeze()
-        assert (result[:, 0] == expected.values).all()
-
-        df = DataFrame({"a": series, "b": ["foo"] * len(series)})
-
-        result = df.values.squeeze()
-        assert (result[:, 0] == expected.values).all()
-
     def test_series_to_categorical(self):
         # see gh-16524: test conversion of Series to Categorical
         series = Series(["a", "b", "c"])
@@ -178,19 +159,6 @@ def test_series_to_categorical(self):
 
         tm.assert_series_equal(result, expected)
 
-    @pytest.mark.parametrize(
-        "data",
-        [
-            pd.period_range("2000", periods=4),
-            pd.IntervalIndex.from_breaks([1, 2, 3, 4]),
-        ],
-    )
-    def test_values_compatibility(self, data):
-        # https://github.com/pandas-dev/pandas/issues/23995
-        result = Series(data).values
-        expected = np.array(data.astype(object))
-        tm.assert_numpy_array_equal(result, expected)
-
     def test_reindex_astype_order_consistency(self):
         # GH 17444
         s = Series([1, 2, 3], index=[2, 0, 1])
diff --git a/pandas/tests/series/test_period.py b/pandas/tests/series/test_period.py
deleted file mode 100644
index 17dbfa9cf379a..0000000000000
--- a/pandas/tests/series/test_period.py
+++ /dev/null
@@ -1,24 +0,0 @@
-import numpy as np
-
-from pandas import DataFrame, Series, period_range
-
-
-class TestSeriesPeriod:
-
-    # ---------------------------------------------------------------------
-    # NaT support
-
-    def test_intercept_astype_object(self):
-        series = Series(period_range("2000-01-01", periods=10, freq="D"))
-
-        expected = series.astype("object")
-
-        df = DataFrame({"a": series, "b": np.random.randn(len(series))})
-
-        result = df.values.squeeze()
-        assert (result[:, 0] == expected.values).all()
-
-        df = DataFrame({"a": series, "b": ["foo"] * len(series)})
-
-        result = df.values.squeeze()
-        assert (result[:, 0] == expected.values).all()
diff --git a/pandas/tests/series/test_timeseries.py b/pandas/tests/series/test_timeseries.py
deleted file mode 100644
index 0769606d18d57..0000000000000
--- a/pandas/tests/series/test_timeseries.py
+++ /dev/null
@@ -1,41 +0,0 @@
-import numpy as np
-
-from pandas import DataFrame, Series, date_range, timedelta_range
-import pandas._testing as tm
-
-
-class TestTimeSeries:
-    def test_promote_datetime_date(self):
-        rng = date_range("1/1/2000", periods=20)
-        ts = Series(np.random.randn(20), index=rng)
-
-        ts_slice = ts[5:]
-        ts2 = ts_slice.copy()
-        ts2.index = [x.date() for x in ts2.index]
-
-        result = ts + ts2
-        result2 = ts2 + ts
-        expected = ts + ts[5:]
-        expected.index = expected.index._with_freq(None)
-        tm.assert_series_equal(result, expected)
-        tm.assert_series_equal(result2, expected)
-
-        # test asfreq
-        result = ts2.asfreq("4H", method="ffill")
-        expected = ts[5:].asfreq("4H", method="ffill")
-        tm.assert_series_equal(result, expected)
-
-        result = rng.get_indexer(ts2.index)
-        expected = rng.get_indexer(ts_slice.index)
-        tm.assert_numpy_array_equal(result, expected)
-
-    def test_series_map_box_timedelta(self):
-        # GH 11349
-        s = Series(timedelta_range("1 day 1 s", periods=5, freq="h"))
-
-        def f(x):
-            return x.total_seconds()
-
-        s.map(f)
-        s.apply(f)
-        DataFrame(s).applymap(f)

From d75eb5ba1be16b6cd74fd44a68ce124be6575e4f Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Tue, 3 Nov 2020 17:53:16 -0800
Subject: [PATCH 05/21] TST/REF: share tests across Series/DataFrame (#37616)

---
 pandas/tests/frame/methods/test_asof.py       |  10 +-
 pandas/tests/frame/methods/test_droplevel.py  |  29 +++--
 .../frame/methods/test_first_and_last.py      |  44 +++++---
 pandas/tests/frame/methods/test_head_tail.py  |   3 +
 pandas/tests/frame/methods/test_truncate.py   |  69 ++++++++----
 pandas/tests/frame/methods/test_tz_convert.py |   9 +-
 .../tests/frame/methods/test_tz_localize.py   |   9 +-
 pandas/tests/series/methods/test_asof.py      |   3 -
 pandas/tests/series/methods/test_droplevel.py |  19 ----
 .../series/methods/test_first_and_last.py     |  69 ------------
 .../series/{indexing => methods}/test_pop.py  |   0
 pandas/tests/series/methods/test_truncate.py  | 106 ------------------
 12 files changed, 111 insertions(+), 259 deletions(-)
 delete mode 100644 pandas/tests/series/methods/test_droplevel.py
 delete mode 100644 pandas/tests/series/methods/test_first_and_last.py
 rename pandas/tests/series/{indexing => methods}/test_pop.py (100%)

diff --git a/pandas/tests/frame/methods/test_asof.py b/pandas/tests/frame/methods/test_asof.py
index 70b42976c95a7..6931dd0ea2d4c 100644
--- a/pandas/tests/frame/methods/test_asof.py
+++ b/pandas/tests/frame/methods/test_asof.py
@@ -96,12 +96,16 @@ def test_missing(self, date_range_frame):
         result = df.asof("1989-12-31")
         assert isinstance(result.name, Period)
 
+    def test_asof_all_nans(self, frame_or_series):
+        # GH 15713
+        # DataFrame/Series is all nans
+        result = frame_or_series([np.nan]).asof([0])
+        expected = frame_or_series([np.nan])
+        tm.assert_equal(result, expected)
+
     def test_all_nans(self, date_range_frame):
         # GH 15713
         # DataFrame is all nans
-        result = DataFrame([np.nan]).asof([0])
-        expected = DataFrame([np.nan])
-        tm.assert_frame_equal(result, expected)
 
         # testing non-default indexes, multiple inputs
         N = 150
diff --git a/pandas/tests/frame/methods/test_droplevel.py b/pandas/tests/frame/methods/test_droplevel.py
index 517905cf23259..ce98704b03106 100644
--- a/pandas/tests/frame/methods/test_droplevel.py
+++ b/pandas/tests/frame/methods/test_droplevel.py
@@ -1,23 +1,32 @@
+import pytest
+
 from pandas import DataFrame, Index, MultiIndex
 import pandas._testing as tm
 
 
 class TestDropLevel:
-    def test_droplevel(self):
+    def test_droplevel(self, frame_or_series):
         # GH#20342
-        df = DataFrame([[1, 2, 3, 4], [5, 6, 7, 8], [9, 10, 11, 12]])
-        df = df.set_index([0, 1]).rename_axis(["a", "b"])
-        df.columns = MultiIndex.from_tuples(
+        cols = MultiIndex.from_tuples(
             [("c", "e"), ("d", "f")], names=["level_1", "level_2"]
         )
+        mi = MultiIndex.from_tuples([(1, 2), (5, 6), (9, 10)], names=["a", "b"])
+        df = DataFrame([[3, 4], [7, 8], [11, 12]], index=mi, columns=cols)
+        if frame_or_series is not DataFrame:
+            df = df.iloc[:, 0]
 
         # test that dropping of a level in index works
         expected = df.reset_index("a", drop=True)
         result = df.droplevel("a", axis="index")
-        tm.assert_frame_equal(result, expected)
+        tm.assert_equal(result, expected)
 
-        # test that dropping of a level in columns works
-        expected = df.copy()
-        expected.columns = Index(["c", "d"], name="level_1")
-        result = df.droplevel("level_2", axis="columns")
-        tm.assert_frame_equal(result, expected)
+        if frame_or_series is DataFrame:
+            # test that dropping of a level in columns works
+            expected = df.copy()
+            expected.columns = Index(["c", "d"], name="level_1")
+            result = df.droplevel("level_2", axis="columns")
+            tm.assert_equal(result, expected)
+        else:
+            # test that droplevel raises ValueError on axis != 0
+            with pytest.raises(ValueError, match="No axis named columns"):
+                df.droplevel(1, axis="columns")
diff --git a/pandas/tests/frame/methods/test_first_and_last.py b/pandas/tests/frame/methods/test_first_and_last.py
index 2b3756969acca..d21e1eee54e16 100644
--- a/pandas/tests/frame/methods/test_first_and_last.py
+++ b/pandas/tests/frame/methods/test_first_and_last.py
@@ -8,56 +8,64 @@
 
 
 class TestFirst:
-    def test_first_subset(self):
+    def test_first_subset(self, frame_or_series):
         ts = tm.makeTimeDataFrame(freq="12h")
+        if frame_or_series is not DataFrame:
+            ts = ts["A"]
         result = ts.first("10d")
         assert len(result) == 20
 
         ts = tm.makeTimeDataFrame(freq="D")
+        if frame_or_series is not DataFrame:
+            ts = ts["A"]
         result = ts.first("10d")
         assert len(result) == 10
 
         result = ts.first("3M")
         expected = ts[:"3/31/2000"]
-        tm.assert_frame_equal(result, expected)
+        tm.assert_equal(result, expected)
 
         result = ts.first("21D")
         expected = ts[:21]
-        tm.assert_frame_equal(result, expected)
+        tm.assert_equal(result, expected)
 
         result = ts[:0].first("3M")
-        tm.assert_frame_equal(result, ts[:0])
+        tm.assert_equal(result, ts[:0])
 
-    def test_first_raises(self):
+    def test_first_last_raises(self, frame_or_series):
         # GH#20725
-        df = DataFrame([[1, 2, 3], [4, 5, 6]])
+        obj = DataFrame([[1, 2, 3], [4, 5, 6]])
+        if frame_or_series is not DataFrame:
+            obj = obj[0]
+
         msg = "'first' only supports a DatetimeIndex index"
         with pytest.raises(TypeError, match=msg):  # index is not a DatetimeIndex
-            df.first("1D")
+            obj.first("1D")
+
+        msg = "'last' only supports a DatetimeIndex index"
+        with pytest.raises(TypeError, match=msg):  # index is not a DatetimeIndex
+            obj.last("1D")
 
-    def test_last_subset(self):
+    def test_last_subset(self, frame_or_series):
         ts = tm.makeTimeDataFrame(freq="12h")
+        if frame_or_series is not DataFrame:
+            ts = ts["A"]
         result = ts.last("10d")
         assert len(result) == 20
 
         ts = tm.makeTimeDataFrame(nper=30, freq="D")
+        if frame_or_series is not DataFrame:
+            ts = ts["A"]
         result = ts.last("10d")
         assert len(result) == 10
 
         result = ts.last("21D")
         expected = ts["2000-01-10":]
-        tm.assert_frame_equal(result, expected)
+        tm.assert_equal(result, expected)
 
         result = ts.last("21D")
         expected = ts[-21:]
-        tm.assert_frame_equal(result, expected)
+        tm.assert_equal(result, expected)
 
         result = ts[:0].last("3M")
-        tm.assert_frame_equal(result, ts[:0])
-
-    def test_last_raises(self):
-        # GH20725
-        df = DataFrame([[1, 2, 3], [4, 5, 6]])
-        msg = "'last' only supports a DatetimeIndex index"
-        with pytest.raises(TypeError, match=msg):  # index is not a DatetimeIndex
-            df.last("1D")
+        tm.assert_equal(result, ts[:0])
diff --git a/pandas/tests/frame/methods/test_head_tail.py b/pandas/tests/frame/methods/test_head_tail.py
index fa28f7d3e16a2..99cb7840c3eb6 100644
--- a/pandas/tests/frame/methods/test_head_tail.py
+++ b/pandas/tests/frame/methods/test_head_tail.py
@@ -48,6 +48,9 @@ def test_head_tail(float_frame):
     tm.assert_frame_equal(df.tail(0), df[0:0])
     tm.assert_frame_equal(df.head(-1), df.iloc[:-1])
     tm.assert_frame_equal(df.tail(-1), df.iloc[1:])
+
+
+def test_head_tail_empty():
     # test empty dataframe
     empty_df = DataFrame()
     tm.assert_frame_equal(empty_df.tail(), empty_df)
diff --git a/pandas/tests/frame/methods/test_truncate.py b/pandas/tests/frame/methods/test_truncate.py
index 674f482c478a0..c6d6637edc88c 100644
--- a/pandas/tests/frame/methods/test_truncate.py
+++ b/pandas/tests/frame/methods/test_truncate.py
@@ -2,12 +2,15 @@
 import pytest
 
 import pandas as pd
+from pandas import DataFrame, Series, date_range
 import pandas._testing as tm
 
 
 class TestDataFrameTruncate:
-    def test_truncate(self, datetime_frame):
+    def test_truncate(self, datetime_frame, frame_or_series):
         ts = datetime_frame[::3]
+        if frame_or_series is Series:
+            ts = ts.iloc[:, 0]
 
         start, end = datetime_frame.index[3], datetime_frame.index[6]
 
@@ -16,34 +19,41 @@ def test_truncate(self, datetime_frame):
 
         # neither specified
         truncated = ts.truncate()
-        tm.assert_frame_equal(truncated, ts)
+        tm.assert_equal(truncated, ts)
 
         # both specified
         expected = ts[1:3]
 
         truncated = ts.truncate(start, end)
-        tm.assert_frame_equal(truncated, expected)
+        tm.assert_equal(truncated, expected)
 
         truncated = ts.truncate(start_missing, end_missing)
-        tm.assert_frame_equal(truncated, expected)
+        tm.assert_equal(truncated, expected)
 
         # start specified
         expected = ts[1:]
 
         truncated = ts.truncate(before=start)
-        tm.assert_frame_equal(truncated, expected)
+        tm.assert_equal(truncated, expected)
 
         truncated = ts.truncate(before=start_missing)
-        tm.assert_frame_equal(truncated, expected)
+        tm.assert_equal(truncated, expected)
 
         # end specified
         expected = ts[:3]
 
         truncated = ts.truncate(after=end)
-        tm.assert_frame_equal(truncated, expected)
+        tm.assert_equal(truncated, expected)
 
         truncated = ts.truncate(after=end_missing)
-        tm.assert_frame_equal(truncated, expected)
+        tm.assert_equal(truncated, expected)
+
+        # corner case, empty series/frame returned
+        truncated = ts.truncate(after=ts.index[0] - ts.index.freq)
+        assert len(truncated) == 0
+
+        truncated = ts.truncate(before=ts.index[-1] + ts.index.freq)
+        assert len(truncated) == 0
 
         msg = "Truncate: 2000-01-06 00:00:00 must be after 2000-02-04 00:00:00"
         with pytest.raises(ValueError, match=msg):
@@ -57,25 +67,35 @@ def test_truncate_copy(self, datetime_frame):
         truncated.values[:] = 5.0
         assert not (datetime_frame.values[5:11] == 5).any()
 
-    def test_truncate_nonsortedindex(self):
+    def test_truncate_nonsortedindex(self, frame_or_series):
         # GH#17935
 
-        df = pd.DataFrame({"A": ["a", "b", "c", "d", "e"]}, index=[5, 3, 2, 9, 0])
+        obj = DataFrame({"A": ["a", "b", "c", "d", "e"]}, index=[5, 3, 2, 9, 0])
+        if frame_or_series is Series:
+            obj = obj["A"]
+
         msg = "truncate requires a sorted index"
         with pytest.raises(ValueError, match=msg):
-            df.truncate(before=3, after=9)
+            obj.truncate(before=3, after=9)
+
+    def test_sort_values_nonsortedindex(self):
+        # TODO: belongs elsewhere?
 
-        rng = pd.date_range("2011-01-01", "2012-01-01", freq="W")
-        ts = pd.DataFrame(
+        rng = date_range("2011-01-01", "2012-01-01", freq="W")
+        ts = DataFrame(
             {"A": np.random.randn(len(rng)), "B": np.random.randn(len(rng))}, index=rng
         )
+
         msg = "truncate requires a sorted index"
         with pytest.raises(ValueError, match=msg):
             ts.sort_values("A", ascending=False).truncate(
                 before="2011-11", after="2011-12"
             )
 
-        df = pd.DataFrame(
+    def test_truncate_nonsortedindex_axis1(self):
+        # GH#17935
+
+        df = DataFrame(
             {
                 3: np.random.randn(5),
                 20: np.random.randn(5),
@@ -93,27 +113,34 @@ def test_truncate_nonsortedindex(self):
         [(1, 2, [2, 1]), (None, 2, [2, 1, 0]), (1, None, [3, 2, 1])],
     )
     @pytest.mark.parametrize("klass", [pd.Int64Index, pd.DatetimeIndex])
-    def test_truncate_decreasing_index(self, before, after, indices, klass):
+    def test_truncate_decreasing_index(
+        self, before, after, indices, klass, frame_or_series
+    ):
         # https://github.com/pandas-dev/pandas/issues/33756
         idx = klass([3, 2, 1, 0])
         if klass is pd.DatetimeIndex:
             before = pd.Timestamp(before) if before is not None else None
             after = pd.Timestamp(after) if after is not None else None
             indices = [pd.Timestamp(i) for i in indices]
-        values = pd.DataFrame(range(len(idx)), index=idx)
+        values = frame_or_series(range(len(idx)), index=idx)
         result = values.truncate(before=before, after=after)
         expected = values.loc[indices]
-        tm.assert_frame_equal(result, expected)
+        tm.assert_equal(result, expected)
 
-    def test_truncate_multiindex(self):
+    def test_truncate_multiindex(self, frame_or_series):
         # GH 34564
         mi = pd.MultiIndex.from_product([[1, 2, 3, 4], ["A", "B"]], names=["L1", "L2"])
-        s1 = pd.DataFrame(range(mi.shape[0]), index=mi, columns=["col"])
+        s1 = DataFrame(range(mi.shape[0]), index=mi, columns=["col"])
+        if frame_or_series is Series:
+            s1 = s1["col"]
+
         result = s1.truncate(before=2, after=3)
 
-        df = pd.DataFrame.from_dict(
+        df = DataFrame.from_dict(
             {"L1": [2, 2, 3, 3], "L2": ["A", "B", "A", "B"], "col": [2, 3, 4, 5]}
         )
         expected = df.set_index(["L1", "L2"])
+        if frame_or_series is Series:
+            expected = expected["col"]
 
-        tm.assert_frame_equal(result, expected)
+        tm.assert_equal(result, expected)
diff --git a/pandas/tests/frame/methods/test_tz_convert.py b/pandas/tests/frame/methods/test_tz_convert.py
index c70e479723644..ecb30cf11319b 100644
--- a/pandas/tests/frame/methods/test_tz_convert.py
+++ b/pandas/tests/frame/methods/test_tz_convert.py
@@ -1,7 +1,7 @@
 import numpy as np
 import pytest
 
-from pandas import DataFrame, Index, MultiIndex, Series, date_range
+from pandas import DataFrame, Index, MultiIndex, date_range
 import pandas._testing as tm
 
 
@@ -89,17 +89,16 @@ def test_tz_convert_and_localize(self, fn):
             df = DataFrame(index=l0)
             df = getattr(df, fn)("US/Pacific", level=1)
 
-    @pytest.mark.parametrize("klass", [Series, DataFrame])
     @pytest.mark.parametrize("copy", [True, False])
-    def test_tz_convert_copy_inplace_mutate(self, copy, klass):
+    def test_tz_convert_copy_inplace_mutate(self, copy, frame_or_series):
         # GH#6326
-        obj = klass(
+        obj = frame_or_series(
             np.arange(0, 5),
             index=date_range("20131027", periods=5, freq="1H", tz="Europe/Berlin"),
         )
         orig = obj.copy()
         result = obj.tz_convert("UTC", copy=copy)
-        expected = klass(np.arange(0, 5), index=obj.index.tz_convert("UTC"))
+        expected = frame_or_series(np.arange(0, 5), index=obj.index.tz_convert("UTC"))
         tm.assert_equal(result, expected)
         tm.assert_equal(obj, orig)
         assert result.index is not obj.index
diff --git a/pandas/tests/frame/methods/test_tz_localize.py b/pandas/tests/frame/methods/test_tz_localize.py
index 183b81ca5298e..aa5ab51fe3d8b 100644
--- a/pandas/tests/frame/methods/test_tz_localize.py
+++ b/pandas/tests/frame/methods/test_tz_localize.py
@@ -1,7 +1,7 @@
 import numpy as np
 import pytest
 
-from pandas import DataFrame, Series, date_range
+from pandas import DataFrame, date_range
 import pandas._testing as tm
 
 
@@ -23,16 +23,15 @@ def test_frame_tz_localize(self):
         assert result.columns.tz.zone == "UTC"
         tm.assert_frame_equal(result, expected.T)
 
-    @pytest.mark.parametrize("klass", [Series, DataFrame])
     @pytest.mark.parametrize("copy", [True, False])
-    def test_tz_localize_copy_inplace_mutate(self, copy, klass):
+    def test_tz_localize_copy_inplace_mutate(self, copy, frame_or_series):
         # GH#6326
-        obj = klass(
+        obj = frame_or_series(
             np.arange(0, 5), index=date_range("20131027", periods=5, freq="1H", tz=None)
         )
         orig = obj.copy()
         result = obj.tz_localize("UTC", copy=copy)
-        expected = klass(
+        expected = frame_or_series(
             np.arange(0, 5),
             index=date_range("20131027", periods=5, freq="1H", tz="UTC"),
         )
diff --git a/pandas/tests/series/methods/test_asof.py b/pandas/tests/series/methods/test_asof.py
index 4b4ef5ea046be..43d40d53dcd21 100644
--- a/pandas/tests/series/methods/test_asof.py
+++ b/pandas/tests/series/methods/test_asof.py
@@ -161,9 +161,6 @@ def test_errors(self):
     def test_all_nans(self):
         # GH 15713
         # series is all nans
-        result = Series([np.nan]).asof([0])
-        expected = Series([np.nan])
-        tm.assert_series_equal(result, expected)
 
         # testing non-default indexes
         N = 50
diff --git a/pandas/tests/series/methods/test_droplevel.py b/pandas/tests/series/methods/test_droplevel.py
deleted file mode 100644
index 449ddd1cd0e49..0000000000000
--- a/pandas/tests/series/methods/test_droplevel.py
+++ /dev/null
@@ -1,19 +0,0 @@
-import pytest
-
-from pandas import MultiIndex, Series
-import pandas._testing as tm
-
-
-class TestDropLevel:
-    def test_droplevel(self):
-        # GH#20342
-        ser = Series([1, 2, 3, 4])
-        ser.index = MultiIndex.from_arrays(
-            [(1, 2, 3, 4), (5, 6, 7, 8)], names=["a", "b"]
-        )
-        expected = ser.reset_index("b", drop=True)
-        result = ser.droplevel("b", axis="index")
-        tm.assert_series_equal(result, expected)
-        # test that droplevel raises ValueError on axis != 0
-        with pytest.raises(ValueError, match="No axis named columns"):
-            ser.droplevel(1, axis="columns")
diff --git a/pandas/tests/series/methods/test_first_and_last.py b/pandas/tests/series/methods/test_first_and_last.py
deleted file mode 100644
index 7629dc8cda30b..0000000000000
--- a/pandas/tests/series/methods/test_first_and_last.py
+++ /dev/null
@@ -1,69 +0,0 @@
-"""
-Note: includes tests for `last`
-"""
-
-import numpy as np
-import pytest
-
-from pandas import Series, date_range
-import pandas._testing as tm
-
-
-class TestFirst:
-    def test_first_subset(self):
-        rng = date_range("1/1/2000", "1/1/2010", freq="12h")
-        ts = Series(np.random.randn(len(rng)), index=rng)
-        result = ts.first("10d")
-        assert len(result) == 20
-
-        rng = date_range("1/1/2000", "1/1/2010", freq="D")
-        ts = Series(np.random.randn(len(rng)), index=rng)
-        result = ts.first("10d")
-        assert len(result) == 10
-
-        result = ts.first("3M")
-        expected = ts[:"3/31/2000"]
-        tm.assert_series_equal(result, expected)
-
-        result = ts.first("21D")
-        expected = ts[:21]
-        tm.assert_series_equal(result, expected)
-
-        result = ts[:0].first("3M")
-        tm.assert_series_equal(result, ts[:0])
-
-    def test_first_raises(self):
-        # GH#20725
-        ser = Series("a b c".split())
-        msg = "'first' only supports a DatetimeIndex index"
-        with pytest.raises(TypeError, match=msg):
-            ser.first("1D")
-
-    def test_last_subset(self):
-        rng = date_range("1/1/2000", "1/1/2010", freq="12h")
-        ts = Series(np.random.randn(len(rng)), index=rng)
-        result = ts.last("10d")
-        assert len(result) == 20
-
-        rng = date_range("1/1/2000", "1/1/2010", freq="D")
-        ts = Series(np.random.randn(len(rng)), index=rng)
-        result = ts.last("10d")
-        assert len(result) == 10
-
-        result = ts.last("21D")
-        expected = ts["12/12/2009":]
-        tm.assert_series_equal(result, expected)
-
-        result = ts.last("21D")
-        expected = ts[-21:]
-        tm.assert_series_equal(result, expected)
-
-        result = ts[:0].last("3M")
-        tm.assert_series_equal(result, ts[:0])
-
-    def test_last_raises(self):
-        # GH#20725
-        ser = Series("a b c".split())
-        msg = "'last' only supports a DatetimeIndex index"
-        with pytest.raises(TypeError, match=msg):
-            ser.last("1D")
diff --git a/pandas/tests/series/indexing/test_pop.py b/pandas/tests/series/methods/test_pop.py
similarity index 100%
rename from pandas/tests/series/indexing/test_pop.py
rename to pandas/tests/series/methods/test_pop.py
diff --git a/pandas/tests/series/methods/test_truncate.py b/pandas/tests/series/methods/test_truncate.py
index b03f516eeffc5..21de593c0e2af 100644
--- a/pandas/tests/series/methods/test_truncate.py
+++ b/pandas/tests/series/methods/test_truncate.py
@@ -1,102 +1,11 @@
 from datetime import datetime
 
-import numpy as np
-import pytest
-
 import pandas as pd
 from pandas import Series, date_range
 import pandas._testing as tm
 
-from pandas.tseries.offsets import BDay
-
 
 class TestTruncate:
-    def test_truncate(self, datetime_series):
-        offset = BDay()
-
-        ts = datetime_series[::3]
-
-        start, end = datetime_series.index[3], datetime_series.index[6]
-        start_missing, end_missing = datetime_series.index[2], datetime_series.index[7]
-
-        # neither specified
-        truncated = ts.truncate()
-        tm.assert_series_equal(truncated, ts)
-
-        # both specified
-        expected = ts[1:3]
-
-        truncated = ts.truncate(start, end)
-        tm.assert_series_equal(truncated, expected)
-
-        truncated = ts.truncate(start_missing, end_missing)
-        tm.assert_series_equal(truncated, expected)
-
-        # start specified
-        expected = ts[1:]
-
-        truncated = ts.truncate(before=start)
-        tm.assert_series_equal(truncated, expected)
-
-        truncated = ts.truncate(before=start_missing)
-        tm.assert_series_equal(truncated, expected)
-
-        # end specified
-        expected = ts[:3]
-
-        truncated = ts.truncate(after=end)
-        tm.assert_series_equal(truncated, expected)
-
-        truncated = ts.truncate(after=end_missing)
-        tm.assert_series_equal(truncated, expected)
-
-        # corner case, empty series returned
-        truncated = ts.truncate(after=datetime_series.index[0] - offset)
-        assert len(truncated) == 0
-
-        truncated = ts.truncate(before=datetime_series.index[-1] + offset)
-        assert len(truncated) == 0
-
-        msg = "Truncate: 1999-12-31 00:00:00 must be after 2000-02-14 00:00:00"
-        with pytest.raises(ValueError, match=msg):
-            ts.truncate(
-                before=datetime_series.index[-1] + offset,
-                after=datetime_series.index[0] - offset,
-            )
-
-    def test_truncate_nonsortedindex(self):
-        # GH#17935
-
-        s = Series(["a", "b", "c", "d", "e"], index=[5, 3, 2, 9, 0])
-        msg = "truncate requires a sorted index"
-
-        with pytest.raises(ValueError, match=msg):
-            s.truncate(before=3, after=9)
-
-        rng = pd.date_range("2011-01-01", "2012-01-01", freq="W")
-        ts = Series(np.random.randn(len(rng)), index=rng)
-        msg = "truncate requires a sorted index"
-
-        with pytest.raises(ValueError, match=msg):
-            ts.sort_values(ascending=False).truncate(before="2011-11", after="2011-12")
-
-    @pytest.mark.parametrize(
-        "before, after, indices",
-        [(1, 2, [2, 1]), (None, 2, [2, 1, 0]), (1, None, [3, 2, 1])],
-    )
-    @pytest.mark.parametrize("klass", [pd.Int64Index, pd.DatetimeIndex])
-    def test_truncate_decreasing_index(self, before, after, indices, klass):
-        # https://github.com/pandas-dev/pandas/issues/33756
-        idx = klass([3, 2, 1, 0])
-        if klass is pd.DatetimeIndex:
-            before = pd.Timestamp(before) if before is not None else None
-            after = pd.Timestamp(after) if after is not None else None
-            indices = [pd.Timestamp(i) for i in indices]
-        values = Series(range(len(idx)), index=idx)
-        result = values.truncate(before=before, after=after)
-        expected = values.loc[indices]
-        tm.assert_series_equal(result, expected)
-
     def test_truncate_datetimeindex_tz(self):
         # GH 9243
         idx = date_range("4/1/2005", "4/30/2005", freq="D", tz="US/Pacific")
@@ -133,21 +42,6 @@ def test_truncate_periodindex(self):
         expected_idx2 = pd.PeriodIndex([pd.Period("2017-09-02")])
         tm.assert_series_equal(result2, Series([2], index=expected_idx2))
 
-    def test_truncate_multiindex(self):
-        # GH 34564
-        mi = pd.MultiIndex.from_product([[1, 2, 3, 4], ["A", "B"]], names=["L1", "L2"])
-        s1 = Series(range(mi.shape[0]), index=mi, name="col")
-        result = s1.truncate(before=2, after=3)
-
-        df = pd.DataFrame.from_dict(
-            {"L1": [2, 2, 3, 3], "L2": ["A", "B", "A", "B"], "col": [2, 3, 4, 5]}
-        )
-        return_value = df.set_index(["L1", "L2"], inplace=True)
-        assert return_value is None
-        expected = df.col
-
-        tm.assert_series_equal(result, expected)
-
     def test_truncate_one_element_series(self):
         # GH 35544
         series = Series([0.1], index=pd.DatetimeIndex(["2020-08-04"]))

From 83c2e651b4b4cfff58298c0090b67a0a3d4db2e1 Mon Sep 17 00:00:00 2001
From: Sven <sven.schellenberg@paradynsystems.com>
Date: Wed, 4 Nov 2020 12:55:11 +1100
Subject: [PATCH 06/21] Gh 36562 typeerror comparison not supported between
 float and str (#37096)

---
 doc/source/whatsnew/v1.2.0.rst                |  1 +
 pandas/core/algorithms.py                     | 43 ++++++++++++++-----
 .../tests/frame/methods/test_combine_first.py | 31 ++++++++++++-
 pandas/tests/test_sorting.py                  |  7 +++
 4 files changed, 70 insertions(+), 12 deletions(-)

diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
index 7111d54d65815..ae6e2de1b819c 100644
--- a/doc/source/whatsnew/v1.2.0.rst
+++ b/doc/source/whatsnew/v1.2.0.rst
@@ -469,6 +469,7 @@ MultiIndex
 
 - Bug in :meth:`DataFrame.xs` when used with :class:`IndexSlice` raises ``TypeError`` with message ``"Expected label or tuple of labels"`` (:issue:`35301`)
 - Bug in :meth:`DataFrame.reset_index` with ``NaT`` values in index raises ``ValueError`` with message ``"cannot convert float NaN to integer"`` (:issue:`36541`)
+- Bug in :meth:`DataFrame.combine_first` when used with :class:`MultiIndex` containing string and ``NaN`` values raises ``TypeError`` (:issue:`36562`)
 
 I/O
 ^^^
diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
index e9e04ace784b6..ec88eb817b3f8 100644
--- a/pandas/core/algorithms.py
+++ b/pandas/core/algorithms.py
@@ -2061,27 +2061,25 @@ def safe_sort(
         dtype, _ = infer_dtype_from_array(values)
         values = np.asarray(values, dtype=dtype)
 
-    def sort_mixed(values):
-        # order ints before strings, safe in py3
-        str_pos = np.array([isinstance(x, str) for x in values], dtype=bool)
-        nums = np.sort(values[~str_pos])
-        strs = np.sort(values[str_pos])
-        return np.concatenate([nums, np.asarray(strs, dtype=object)])
-
     sorter = None
+
     if (
         not is_extension_array_dtype(values)
         and lib.infer_dtype(values, skipna=False) == "mixed-integer"
     ):
-        # unorderable in py3 if mixed str/int
-        ordered = sort_mixed(values)
+        ordered = _sort_mixed(values)
     else:
         try:
             sorter = values.argsort()
             ordered = values.take(sorter)
         except TypeError:
-            # try this anyway
-            ordered = sort_mixed(values)
+            # Previous sorters failed or were not applicable, try `_sort_mixed`
+            # which would work, but which fails for special case of 1d arrays
+            # with tuples.
+            if values.size and isinstance(values[0], tuple):
+                ordered = _sort_tuples(values)
+            else:
+                ordered = _sort_mixed(values)
 
     # codes:
 
@@ -2128,3 +2126,26 @@ def sort_mixed(values):
         np.putmask(new_codes, mask, na_sentinel)
 
     return ordered, ensure_platform_int(new_codes)
+
+
+def _sort_mixed(values):
+    """ order ints before strings in 1d arrays, safe in py3 """
+    str_pos = np.array([isinstance(x, str) for x in values], dtype=bool)
+    nums = np.sort(values[~str_pos])
+    strs = np.sort(values[str_pos])
+    return np.concatenate([nums, np.asarray(strs, dtype=object)])
+
+
+def _sort_tuples(values: np.ndarray[tuple]):
+    """
+    Convert array of tuples (1d) to array or array (2d).
+    We need to keep the columns separately as they contain different types and
+    nans (can't use `np.sort` as it may fail when str and nan are mixed in a
+    column as types cannot be compared).
+    """
+    from pandas.core.internals.construction import to_arrays
+    from pandas.core.sorting import lexsort_indexer
+
+    arrays, _ = to_arrays(values, None)
+    indexer = lexsort_indexer(arrays, orders=True)
+    return values[indexer]
diff --git a/pandas/tests/frame/methods/test_combine_first.py b/pandas/tests/frame/methods/test_combine_first.py
index 4850c6a50f8a8..08c4293323500 100644
--- a/pandas/tests/frame/methods/test_combine_first.py
+++ b/pandas/tests/frame/methods/test_combine_first.py
@@ -4,7 +4,7 @@
 import pytest
 
 import pandas as pd
-from pandas import DataFrame, Index, Series
+from pandas import DataFrame, Index, MultiIndex, Series
 import pandas._testing as tm
 
 
@@ -365,3 +365,32 @@ def test_combine_first_string_dtype_only_na(self):
             {"a": ["962", "85"], "b": [pd.NA] * 2}, dtype="string"
         ).set_index(["a", "b"])
         tm.assert_frame_equal(result, expected)
+
+
+def test_combine_first_with_nan_multiindex():
+    # gh-36562
+
+    mi1 = MultiIndex.from_arrays(
+        [["b", "b", "c", "a", "b", np.nan], [1, 2, 3, 4, 5, 6]], names=["a", "b"]
+    )
+    df = DataFrame({"c": [1, 1, 1, 1, 1, 1]}, index=mi1)
+    mi2 = MultiIndex.from_arrays(
+        [["a", "b", "c", "a", "b", "d"], [1, 1, 1, 1, 1, 1]], names=["a", "b"]
+    )
+    s = Series([1, 2, 3, 4, 5, 6], index=mi2)
+    res = df.combine_first(DataFrame({"d": s}))
+    mi_expected = MultiIndex.from_arrays(
+        [
+            ["a", "a", "a", "b", "b", "b", "b", "c", "c", "d", np.nan],
+            [1, 1, 4, 1, 1, 2, 5, 1, 3, 1, 6],
+        ],
+        names=["a", "b"],
+    )
+    expected = DataFrame(
+        {
+            "c": [np.nan, np.nan, 1, 1, 1, 1, 1, np.nan, 1, np.nan, 1],
+            "d": [1.0, 4.0, np.nan, 2.0, 5.0, np.nan, np.nan, 3.0, np.nan, 6.0, np.nan],
+        },
+        index=mi_expected,
+    )
+    tm.assert_frame_equal(res, expected)
diff --git a/pandas/tests/test_sorting.py b/pandas/tests/test_sorting.py
index 1c9fd46ae451f..5f85ae2ec2318 100644
--- a/pandas/tests/test_sorting.py
+++ b/pandas/tests/test_sorting.py
@@ -453,3 +453,10 @@ def test_extension_array_codes(self, verify, na_sentinel):
         expected_codes = np.array([0, 2, na_sentinel, 1], dtype=np.intp)
         tm.assert_extension_array_equal(result, expected_values)
         tm.assert_numpy_array_equal(codes, expected_codes)
+
+
+def test_mixed_str_nan():
+    values = np.array(["b", np.nan, "a", "b"], dtype=object)
+    result = safe_sort(values)
+    expected = np.array([np.nan, "a", "b", "b"], dtype=object)
+    tm.assert_numpy_array_equal(result, expected)

From 1e69e2c3cab6f66b4f0b782a36a3c0c6ba562108 Mon Sep 17 00:00:00 2001
From: Micael Jarniac <micael@jarniac.com>
Date: Tue, 3 Nov 2020 22:58:12 -0300
Subject: [PATCH 07/21] docs: fix punctuation (#37612)

---
 pandas/core/generic.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index c90ab9cceea8c..8050ce8b1b636 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -2196,7 +2196,7 @@ def to_json(
             * Series:
 
                 - default is 'index'
-                - allowed values are: {'split','records','index','table'}.
+                - allowed values are: {'split', 'records', 'index', 'table'}.
 
             * DataFrame:
 

From 831320f05da1be9c0a3191ac6bb1ef403686cfb1 Mon Sep 17 00:00:00 2001
From: Andrew Wieteska <48889395+arw2019@users.noreply.github.com>
Date: Tue, 3 Nov 2020 20:59:21 -0500
Subject: [PATCH 08/21] REGR: pd.to_hdf(..., dropna=True) not dropping missing
 rows (#37564)

---
 doc/source/whatsnew/v1.2.0.rst         |  1 +
 pandas/io/pytables.py                  |  3 +++
 pandas/tests/io/pytables/test_store.py | 25 ++++++++++++++++++++-----
 3 files changed, 24 insertions(+), 5 deletions(-)

diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
index ae6e2de1b819c..16e6c12488b83 100644
--- a/doc/source/whatsnew/v1.2.0.rst
+++ b/doc/source/whatsnew/v1.2.0.rst
@@ -493,6 +493,7 @@ I/O
 - Bug in output rendering of complex numbers showing too many trailing zeros (:issue:`36799`)
 - Bug in :class:`HDFStore` threw a ``TypeError`` when exporting an empty :class:`DataFrame` with ``datetime64[ns, tz]`` dtypes with a fixed HDF5 store (:issue:`20594`)
 - Bug in :class:`HDFStore` was dropping timezone information when exporting :class:`Series` with ``datetime64[ns, tz]`` dtypes with a fixed HDF5 store (:issue:`20594`)
+- Bug in :meth:`DataFrame.to_hdf` was not dropping missing rows with ``dropna=True`` (:issue:`35719`)
 
 Plotting
 ^^^^^^^^
diff --git a/pandas/io/pytables.py b/pandas/io/pytables.py
index 347ce6e853794..bf21a8fe2fc74 100644
--- a/pandas/io/pytables.py
+++ b/pandas/io/pytables.py
@@ -268,6 +268,7 @@ def to_hdf(
             data_columns=data_columns,
             errors=errors,
             encoding=encoding,
+            dropna=dropna,
         )
 
     path_or_buf = stringify_path(path_or_buf)
@@ -1051,6 +1052,7 @@ def put(
         encoding=None,
         errors: str = "strict",
         track_times: bool = True,
+        dropna: bool = False,
     ):
         """
         Store object in HDFStore.
@@ -1100,6 +1102,7 @@ def put(
             encoding=encoding,
             errors=errors,
             track_times=track_times,
+            dropna=dropna,
         )
 
     def remove(self, key: str, where=None, start=None, stop=None):
diff --git a/pandas/tests/io/pytables/test_store.py b/pandas/tests/io/pytables/test_store.py
index f37b0aabd3aed..d76a5a6f64055 100644
--- a/pandas/tests/io/pytables/test_store.py
+++ b/pandas/tests/io/pytables/test_store.py
@@ -1253,17 +1253,32 @@ def test_append_all_nans(self, setup_path):
             store.append("df2", df[10:], dropna=False)
             tm.assert_frame_equal(store["df2"], df)
 
-        # Test to make sure defaults are to not drop.
-        # Corresponding to Issue 9382
+    def test_store_dropna(self, setup_path):
         df_with_missing = DataFrame(
-            {"col1": [0, np.nan, 2], "col2": [1, np.nan, np.nan]}
+            {"col1": [0.0, np.nan, 2.0], "col2": [1.0, np.nan, np.nan]},
+            index=list("abc"),
         )
+        df_without_missing = DataFrame(
+            {"col1": [0.0, 2.0], "col2": [1.0, np.nan]}, index=list("ac")
+        )
+
+        # # Test to make sure defaults are to not drop.
+        # # Corresponding to Issue 9382
+        with ensure_clean_path(setup_path) as path:
+            df_with_missing.to_hdf(path, "df", format="table")
+            reloaded = read_hdf(path, "df")
+            tm.assert_frame_equal(df_with_missing, reloaded)
 
         with ensure_clean_path(setup_path) as path:
-            df_with_missing.to_hdf(path, "df_with_missing", format="table")
-            reloaded = read_hdf(path, "df_with_missing")
+            df_with_missing.to_hdf(path, "df", format="table", dropna=False)
+            reloaded = read_hdf(path, "df")
             tm.assert_frame_equal(df_with_missing, reloaded)
 
+        with ensure_clean_path(setup_path) as path:
+            df_with_missing.to_hdf(path, "df", format="table", dropna=True)
+            reloaded = read_hdf(path, "df")
+            tm.assert_frame_equal(df_without_missing, reloaded)
+
     def test_read_missing_key_close_store(self, setup_path):
         # GH 25766
         with ensure_clean_path(setup_path) as path:

From e5cbaec9cc79e48091de1bb533344c137264bc11 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Tue, 3 Nov 2020 18:05:28 -0800
Subject: [PATCH 09/21] parametrize set_axis tests (#37619)

---
 pandas/tests/frame/test_alter_axes.py         | 16 ------
 pandas/tests/generic/methods/test_set_axis.py | 22 ++++++++
 pandas/tests/series/methods/test_set_name.py  | 21 +++++++
 pandas/tests/series/test_alter_axes.py        | 55 -------------------
 4 files changed, 43 insertions(+), 71 deletions(-)
 create mode 100644 pandas/tests/series/methods/test_set_name.py
 delete mode 100644 pandas/tests/series/test_alter_axes.py

diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py
index 3cd35e900ee06..4bd1d5fa56468 100644
--- a/pandas/tests/frame/test_alter_axes.py
+++ b/pandas/tests/frame/test_alter_axes.py
@@ -1,7 +1,6 @@
 from datetime import datetime
 
 import numpy as np
-import pytest
 
 from pandas.core.dtypes.common import (
     is_categorical_dtype,
@@ -24,15 +23,6 @@
 
 
 class TestDataFrameAlterAxes:
-    def test_set_index_directly(self, float_string_frame):
-        df = float_string_frame
-        idx = Index(np.arange(len(df))[::-1])
-
-        df.index = idx
-        tm.assert_index_equal(df.index, idx)
-        with pytest.raises(ValueError, match="Length mismatch"):
-            df.index = idx[::2]
-
     def test_convert_dti_to_series(self):
         # don't cast a DatetimeIndex WITH a tz, leave as object
         # GH 6032
@@ -101,12 +91,6 @@ def test_convert_dti_to_series(self):
         df.pop("ts")
         tm.assert_frame_equal(df, expected)
 
-    def test_set_columns(self, float_string_frame):
-        cols = Index(np.arange(len(float_string_frame.columns)))
-        float_string_frame.columns = cols
-        with pytest.raises(ValueError, match="Length mismatch"):
-            float_string_frame.columns = cols[::2]
-
     def test_dti_set_index_reindex(self):
         # GH 6631
         df = DataFrame(np.random.random(6))
diff --git a/pandas/tests/generic/methods/test_set_axis.py b/pandas/tests/generic/methods/test_set_axis.py
index 278d43ef93d2f..a46a91811f40e 100644
--- a/pandas/tests/generic/methods/test_set_axis.py
+++ b/pandas/tests/generic/methods/test_set_axis.py
@@ -57,6 +57,28 @@ def test_set_axis_invalid_axis_name(self, axis, obj):
         with pytest.raises(ValueError, match="No axis named"):
             obj.set_axis(list("abc"), axis=axis)
 
+    def test_set_axis_setattr_index_not_collection(self, obj):
+        # wrong type
+        msg = (
+            r"Index\(\.\.\.\) must be called with a collection of some "
+            r"kind, None was passed"
+        )
+        with pytest.raises(TypeError, match=msg):
+            obj.index = None
+
+    def test_set_axis_setattr_index_wrong_length(self, obj):
+        # wrong length
+        msg = (
+            f"Length mismatch: Expected axis has {len(obj)} elements, "
+            f"new values have {len(obj)-1} elements"
+        )
+        with pytest.raises(ValueError, match=msg):
+            obj.index = np.arange(len(obj) - 1)
+
+        if obj.ndim == 2:
+            with pytest.raises(ValueError, match="Length mismatch"):
+                obj.columns = obj.columns[::2]
+
 
 class TestDataFrameSetAxis(SharedSetAxisTests):
     @pytest.fixture
diff --git a/pandas/tests/series/methods/test_set_name.py b/pandas/tests/series/methods/test_set_name.py
new file mode 100644
index 0000000000000..cbc8ebde7a8ab
--- /dev/null
+++ b/pandas/tests/series/methods/test_set_name.py
@@ -0,0 +1,21 @@
+from datetime import datetime
+
+from pandas import Series
+
+
+class TestSetName:
+    def test_set_name(self):
+        ser = Series([1, 2, 3])
+        ser2 = ser._set_name("foo")
+        assert ser2.name == "foo"
+        assert ser.name is None
+        assert ser is not ser2
+
+    def test_set_name_attribute(self):
+        ser = Series([1, 2, 3])
+        ser2 = Series([1, 2, 3], name="bar")
+        for name in [7, 7.0, "name", datetime(2001, 1, 1), (1,), "\u05D0"]:
+            ser.name = name
+            assert ser.name == name
+            ser2.name = name
+            assert ser2.name == name
diff --git a/pandas/tests/series/test_alter_axes.py b/pandas/tests/series/test_alter_axes.py
deleted file mode 100644
index 181d7de43d945..0000000000000
--- a/pandas/tests/series/test_alter_axes.py
+++ /dev/null
@@ -1,55 +0,0 @@
-from datetime import datetime
-
-import numpy as np
-import pytest
-
-from pandas import Index, Series
-import pandas._testing as tm
-
-
-class TestSeriesAlterAxes:
-    def test_setindex(self, string_series):
-        # wrong type
-        msg = (
-            r"Index\(\.\.\.\) must be called with a collection of some "
-            r"kind, None was passed"
-        )
-        with pytest.raises(TypeError, match=msg):
-            string_series.index = None
-
-        # wrong length
-        msg = (
-            "Length mismatch: Expected axis has 30 elements, "
-            "new values have 29 elements"
-        )
-        with pytest.raises(ValueError, match=msg):
-            string_series.index = np.arange(len(string_series) - 1)
-
-        # works
-        string_series.index = np.arange(len(string_series))
-        assert isinstance(string_series.index, Index)
-
-    # Renaming
-
-    def test_set_name_attribute(self):
-        s = Series([1, 2, 3])
-        s2 = Series([1, 2, 3], name="bar")
-        for name in [7, 7.0, "name", datetime(2001, 1, 1), (1,), "\u05D0"]:
-            s.name = name
-            assert s.name == name
-            s2.name = name
-            assert s2.name == name
-
-    def test_set_name(self):
-        s = Series([1, 2, 3])
-        s2 = s._set_name("foo")
-        assert s2.name == "foo"
-        assert s.name is None
-        assert s is not s2
-
-    def test_set_index_makes_timeseries(self):
-        idx = tm.makeDateIndex(10)
-
-        s = Series(range(10))
-        s.index = idx
-        assert s.index._is_all_dates

From 36f026dba6a3e47568379e6463ecd9e00cc1568c Mon Sep 17 00:00:00 2001
From: Maxim Ivanov <41443370+ivanovmg@users.noreply.github.com>
Date: Wed, 4 Nov 2020 09:18:04 +0700
Subject: [PATCH 10/21] CLN: clean color selection in _matplotlib/style
 (#37203)

---
 pandas/plotting/_matplotlib/style.py | 280 ++++++++++++++++++++++-----
 pandas/tests/plotting/test_style.py  | 157 +++++++++++++++
 2 files changed, 384 insertions(+), 53 deletions(-)
 create mode 100644 pandas/tests/plotting/test_style.py

diff --git a/pandas/plotting/_matplotlib/style.py b/pandas/plotting/_matplotlib/style.py
index b919728971505..b2c7b2610845c 100644
--- a/pandas/plotting/_matplotlib/style.py
+++ b/pandas/plotting/_matplotlib/style.py
@@ -1,4 +1,14 @@
-# being a bit too dynamic
+from typing import (
+    TYPE_CHECKING,
+    Collection,
+    Dict,
+    Iterator,
+    List,
+    Optional,
+    Sequence,
+    Union,
+    cast,
+)
 import warnings
 
 import matplotlib.cm as cm
@@ -9,92 +19,256 @@
 
 import pandas.core.common as com
 
+if TYPE_CHECKING:
+    from matplotlib.colors import Colormap
+
+
+Color = Union[str, Sequence[float]]
+
 
 def get_standard_colors(
-    num_colors: int, colormap=None, color_type: str = "default", color=None
+    num_colors: int,
+    colormap: Optional["Colormap"] = None,
+    color_type: str = "default",
+    color: Optional[Union[Dict[str, Color], Color, Collection[Color]]] = None,
 ):
-    import matplotlib.pyplot as plt
+    """
+    Get standard colors based on `colormap`, `color_type` or `color` inputs.
+
+    Parameters
+    ----------
+    num_colors : int
+        Minimum number of colors to be returned.
+        Ignored if `color` is a dictionary.
+    colormap : :py:class:`matplotlib.colors.Colormap`, optional
+        Matplotlib colormap.
+        When provided, the resulting colors will be derived from the colormap.
+    color_type : {"default", "random"}, optional
+        Type of colors to derive. Used if provided `color` and `colormap` are None.
+        Ignored if either `color` or `colormap` are not None.
+    color : dict or str or sequence, optional
+        Color(s) to be used for deriving sequence of colors.
+        Can be either be a dictionary, or a single color (single color string,
+        or sequence of floats representing a single color),
+        or a sequence of colors.
+
+    Returns
+    -------
+    dict or list
+        Standard colors. Can either be a mapping if `color` was a dictionary,
+        or a list of colors with a length of `num_colors` or more.
+
+    Warns
+    -----
+    UserWarning
+        If both `colormap` and `color` are provided.
+        Parameter `color` will override.
+    """
+    if isinstance(color, dict):
+        return color
+
+    colors = _derive_colors(
+        color=color,
+        colormap=colormap,
+        color_type=color_type,
+        num_colors=num_colors,
+    )
+
+    return _cycle_colors(colors, num_colors=num_colors)
+
+
+def _derive_colors(
+    *,
+    color: Optional[Union[Color, Collection[Color]]],
+    colormap: Optional[Union[str, "Colormap"]],
+    color_type: str,
+    num_colors: int,
+) -> List[Color]:
+    """
+    Derive colors from either `colormap`, `color_type` or `color` inputs.
+
+    Get a list of colors either from `colormap`, or from `color`,
+    or from `color_type` (if both `colormap` and `color` are None).
+
+    Parameters
+    ----------
+    color : str or sequence, optional
+        Color(s) to be used for deriving sequence of colors.
+        Can be either be a single color (single color string, or sequence of floats
+        representing a single color), or a sequence of colors.
+    colormap : :py:class:`matplotlib.colors.Colormap`, optional
+        Matplotlib colormap.
+        When provided, the resulting colors will be derived from the colormap.
+    color_type : {"default", "random"}, optional
+        Type of colors to derive. Used if provided `color` and `colormap` are None.
+        Ignored if either `color` or `colormap`` are not None.
+    num_colors : int
+        Number of colors to be extracted.
 
+    Returns
+    -------
+    list
+        List of colors extracted.
+
+    Warns
+    -----
+    UserWarning
+        If both `colormap` and `color` are provided.
+        Parameter `color` will override.
+    """
     if color is None and colormap is not None:
-        if isinstance(colormap, str):
-            cmap = colormap
-            colormap = cm.get_cmap(colormap)
-            if colormap is None:
-                raise ValueError(f"Colormap {cmap} is not recognized")
-        colors = [colormap(num) for num in np.linspace(0, 1, num=num_colors)]
+        return _get_colors_from_colormap(colormap, num_colors=num_colors)
     elif color is not None:
         if colormap is not None:
             warnings.warn(
                 "'color' and 'colormap' cannot be used simultaneously. Using 'color'"
             )
-        colors = (
-            list(color)
-            if is_list_like(color) and not isinstance(color, dict)
-            else color
-        )
+        return _get_colors_from_color(color)
     else:
-        if color_type == "default":
-            # need to call list() on the result to copy so we don't
-            # modify the global rcParams below
-            try:
-                colors = [c["color"] for c in list(plt.rcParams["axes.prop_cycle"])]
-            except KeyError:
-                colors = list(plt.rcParams.get("axes.color_cycle", list("bgrcmyk")))
-            if isinstance(colors, str):
-                colors = list(colors)
-
-            colors = colors[0:num_colors]
-        elif color_type == "random":
-
-            def random_color(column):
-                """ Returns a random color represented as a list of length 3"""
-                # GH17525 use common._random_state to avoid resetting the seed
-                rs = com.random_state(column)
-                return rs.rand(3).tolist()
-
-            colors = [random_color(num) for num in range(num_colors)]
-        else:
-            raise ValueError("color_type must be either 'default' or 'random'")
+        return _get_colors_from_color_type(color_type, num_colors=num_colors)
 
-    if isinstance(colors, str) and _is_single_color(colors):
-        # GH #36972
-        colors = [colors]
 
-    # Append more colors by cycling if there is not enough color.
-    # Extra colors will be ignored by matplotlib if there are more colors
-    # than needed and nothing needs to be done here.
+def _cycle_colors(colors: List[Color], num_colors: int) -> List[Color]:
+    """Append more colors by cycling if there is not enough color.
+
+    Extra colors will be ignored by matplotlib if there are more colors
+    than needed and nothing needs to be done here.
+    """
     if len(colors) < num_colors:
-        try:
-            multiple = num_colors // len(colors) - 1
-        except ZeroDivisionError:
-            raise ValueError("Invalid color argument: ''")
+        multiple = num_colors // len(colors) - 1
         mod = num_colors % len(colors)
-
         colors += multiple * colors
         colors += colors[:mod]
 
     return colors
 
 
-def _is_single_color(color: str) -> bool:
-    """Check if ``color`` is a single color.
+def _get_colors_from_colormap(
+    colormap: Union[str, "Colormap"],
+    num_colors: int,
+) -> List[Color]:
+    """Get colors from colormap."""
+    colormap = _get_cmap_instance(colormap)
+    return [colormap(num) for num in np.linspace(0, 1, num=num_colors)]
+
+
+def _get_cmap_instance(colormap: Union[str, "Colormap"]) -> "Colormap":
+    """Get instance of matplotlib colormap."""
+    if isinstance(colormap, str):
+        cmap = colormap
+        colormap = cm.get_cmap(colormap)
+        if colormap is None:
+            raise ValueError(f"Colormap {cmap} is not recognized")
+    return colormap
+
+
+def _get_colors_from_color(
+    color: Union[Color, Collection[Color]],
+) -> List[Color]:
+    """Get colors from user input color."""
+    if len(color) == 0:
+        raise ValueError(f"Invalid color argument: {color}")
+
+    if _is_single_color(color):
+        color = cast(Color, color)
+        return [color]
+
+    color = cast(Collection[Color], color)
+    return list(_gen_list_of_colors_from_iterable(color))
+
+
+def _is_single_color(color: Union[Color, Collection[Color]]) -> bool:
+    """Check if `color` is a single color, not a sequence of colors.
+
+    Single color is of these kinds:
+        - Named color "red", "C0", "firebrick"
+        - Alias "g"
+        - Sequence of floats, such as (0.1, 0.2, 0.3) or (0.1, 0.2, 0.3, 0.4).
+
+    See Also
+    --------
+    _is_single_string_color
+    """
+    if isinstance(color, str) and _is_single_string_color(color):
+        # GH #36972
+        return True
+
+    if _is_floats_color(color):
+        return True
+
+    return False
+
+
+def _gen_list_of_colors_from_iterable(color: Collection[Color]) -> Iterator[Color]:
+    """
+    Yield colors from string of several letters or from collection of colors.
+    """
+    for x in color:
+        if _is_single_color(x):
+            yield x
+        else:
+            raise ValueError(f"Invalid color {x}")
+
+
+def _is_floats_color(color: Union[Color, Collection[Color]]) -> bool:
+    """Check if color comprises a sequence of floats representing color."""
+    return bool(
+        is_list_like(color)
+        and (len(color) == 3 or len(color) == 4)
+        and all(isinstance(x, (int, float)) for x in color)
+    )
+
+
+def _get_colors_from_color_type(color_type: str, num_colors: int) -> List[Color]:
+    """Get colors from user input color type."""
+    if color_type == "default":
+        return _get_default_colors(num_colors)
+    elif color_type == "random":
+        return _get_random_colors(num_colors)
+    else:
+        raise ValueError("color_type must be either 'default' or 'random'")
+
+
+def _get_default_colors(num_colors: int) -> List[Color]:
+    """Get `num_colors` of default colors from matplotlib rc params."""
+    import matplotlib.pyplot as plt
+
+    colors = [c["color"] for c in plt.rcParams["axes.prop_cycle"]]
+    return colors[0:num_colors]
+
+
+def _get_random_colors(num_colors: int) -> List[Color]:
+    """Get `num_colors` of random colors."""
+    return [_random_color(num) for num in range(num_colors)]
+
+
+def _random_color(column: int) -> List[float]:
+    """Get a random color represented as a list of length 3"""
+    # GH17525 use common._random_state to avoid resetting the seed
+    rs = com.random_state(column)
+    return rs.rand(3).tolist()
+
+
+def _is_single_string_color(color: Color) -> bool:
+    """Check if `color` is a single string color.
 
-    Examples of single colors:
+    Examples of single string colors:
         - 'r'
         - 'g'
         - 'red'
         - 'green'
         - 'C3'
+        - 'firebrick'
 
     Parameters
     ----------
-    color : string
-        Color string.
+    color : Color
+        Color string or sequence of floats.
 
     Returns
     -------
     bool
-        True if ``color`` looks like a valid color.
+        True if `color` looks like a valid color.
         False otherwise.
     """
     conv = matplotlib.colors.ColorConverter()
diff --git a/pandas/tests/plotting/test_style.py b/pandas/tests/plotting/test_style.py
new file mode 100644
index 0000000000000..665bda15724fd
--- /dev/null
+++ b/pandas/tests/plotting/test_style.py
@@ -0,0 +1,157 @@
+import pytest
+
+from pandas import Series
+
+pytest.importorskip("matplotlib")
+from pandas.plotting._matplotlib.style import get_standard_colors
+
+
+class TestGetStandardColors:
+    @pytest.mark.parametrize(
+        "num_colors, expected",
+        [
+            (3, ["red", "green", "blue"]),
+            (5, ["red", "green", "blue", "red", "green"]),
+            (7, ["red", "green", "blue", "red", "green", "blue", "red"]),
+            (2, ["red", "green"]),
+            (1, ["red"]),
+        ],
+    )
+    def test_default_colors_named_from_prop_cycle(self, num_colors, expected):
+        import matplotlib as mpl
+        from matplotlib.pyplot import cycler
+
+        mpl_params = {
+            "axes.prop_cycle": cycler(color=["red", "green", "blue"]),
+        }
+        with mpl.rc_context(rc=mpl_params):
+            result = get_standard_colors(num_colors=num_colors)
+            assert result == expected
+
+    @pytest.mark.parametrize(
+        "num_colors, expected",
+        [
+            (1, ["b"]),
+            (3, ["b", "g", "r"]),
+            (4, ["b", "g", "r", "y"]),
+            (5, ["b", "g", "r", "y", "b"]),
+            (7, ["b", "g", "r", "y", "b", "g", "r"]),
+        ],
+    )
+    def test_default_colors_named_from_prop_cycle_string(self, num_colors, expected):
+        import matplotlib as mpl
+        from matplotlib.pyplot import cycler
+
+        mpl_params = {
+            "axes.prop_cycle": cycler(color="bgry"),
+        }
+        with mpl.rc_context(rc=mpl_params):
+            result = get_standard_colors(num_colors=num_colors)
+            assert result == expected
+
+    @pytest.mark.parametrize(
+        "num_colors, expected_name",
+        [
+            (1, ["C0"]),
+            (3, ["C0", "C1", "C2"]),
+            (
+                12,
+                [
+                    "C0",
+                    "C1",
+                    "C2",
+                    "C3",
+                    "C4",
+                    "C5",
+                    "C6",
+                    "C7",
+                    "C8",
+                    "C9",
+                    "C0",
+                    "C1",
+                ],
+            ),
+        ],
+    )
+    def test_default_colors_named_undefined_prop_cycle(self, num_colors, expected_name):
+        import matplotlib as mpl
+        import matplotlib.colors as mcolors
+
+        with mpl.rc_context(rc={}):
+            expected = [mcolors.to_hex(x) for x in expected_name]
+            result = get_standard_colors(num_colors=num_colors)
+            assert result == expected
+
+    @pytest.mark.parametrize(
+        "num_colors, expected",
+        [
+            (1, ["red", "green", (0.1, 0.2, 0.3)]),
+            (2, ["red", "green", (0.1, 0.2, 0.3)]),
+            (3, ["red", "green", (0.1, 0.2, 0.3)]),
+            (4, ["red", "green", (0.1, 0.2, 0.3), "red"]),
+        ],
+    )
+    def test_user_input_color_sequence(self, num_colors, expected):
+        color = ["red", "green", (0.1, 0.2, 0.3)]
+        result = get_standard_colors(color=color, num_colors=num_colors)
+        assert result == expected
+
+    @pytest.mark.parametrize(
+        "num_colors, expected",
+        [
+            (1, ["r", "g", "b", "k"]),
+            (2, ["r", "g", "b", "k"]),
+            (3, ["r", "g", "b", "k"]),
+            (4, ["r", "g", "b", "k"]),
+            (5, ["r", "g", "b", "k", "r"]),
+            (6, ["r", "g", "b", "k", "r", "g"]),
+        ],
+    )
+    def test_user_input_color_string(self, num_colors, expected):
+        color = "rgbk"
+        result = get_standard_colors(color=color, num_colors=num_colors)
+        assert result == expected
+
+    @pytest.mark.parametrize(
+        "num_colors, expected",
+        [
+            (1, [(0.1, 0.2, 0.3)]),
+            (2, [(0.1, 0.2, 0.3), (0.1, 0.2, 0.3)]),
+            (3, [(0.1, 0.2, 0.3), (0.1, 0.2, 0.3), (0.1, 0.2, 0.3)]),
+        ],
+    )
+    def test_user_input_color_floats(self, num_colors, expected):
+        color = (0.1, 0.2, 0.3)
+        result = get_standard_colors(color=color, num_colors=num_colors)
+        assert result == expected
+
+    @pytest.mark.parametrize(
+        "color, num_colors, expected",
+        [
+            ("Crimson", 1, ["Crimson"]),
+            ("DodgerBlue", 2, ["DodgerBlue", "DodgerBlue"]),
+            ("firebrick", 3, ["firebrick", "firebrick", "firebrick"]),
+        ],
+    )
+    def test_user_input_named_color_string(self, color, num_colors, expected):
+        result = get_standard_colors(color=color, num_colors=num_colors)
+        assert result == expected
+
+    @pytest.mark.parametrize("color", ["", [], (), Series([], dtype="object")])
+    def test_empty_color_raises(self, color):
+        with pytest.raises(ValueError, match="Invalid color argument"):
+            get_standard_colors(color=color, num_colors=1)
+
+    @pytest.mark.parametrize(
+        "color",
+        [
+            "bad_color",
+            ("red", "green", "bad_color"),
+            (0.1,),
+            (0.1, 0.2),
+            (0.1, 0.2, 0.3, 0.4, 0.5),  # must be either 3 or 4 floats
+        ],
+    )
+    def test_bad_color_raises(self, color):
+        with pytest.raises(ValueError, match="Invalid color"):
+            get_standard_colors(color=color, num_colors=5)

From a5aed5d2979428b00e90e586093c69f6e21864ac Mon Sep 17 00:00:00 2001
From: Erfan Nariman <34067903+erfannariman@users.noreply.github.com>
Date: Wed, 4 Nov 2020 03:21:00 +0100
Subject: [PATCH 11/21] DEPR: DataFrame/Series.slice_shift (#37601)

---
 doc/source/whatsnew/v1.2.0.rst        |  2 ++
 pandas/core/generic.py                | 13 ++++++++++++-
 pandas/tests/generic/test_finalize.py |  2 --
 pandas/tests/generic/test_generic.py  | 11 +++++++++++
 4 files changed, 25 insertions(+), 3 deletions(-)

diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
index 16e6c12488b83..fd5451505eefe 100644
--- a/doc/source/whatsnew/v1.2.0.rst
+++ b/doc/source/whatsnew/v1.2.0.rst
@@ -341,6 +341,8 @@ Deprecations
 - Deprecate use of strings denoting units with 'M', 'Y' or 'y' in :func:`~pandas.to_timedelta` (:issue:`36666`)
 - :class:`Index` methods ``&``, ``|``, and ``^`` behaving as the set operations :meth:`Index.intersection`, :meth:`Index.union`, and :meth:`Index.symmetric_difference`, respectively, are deprecated and in the future will behave as pointwise boolean operations matching :class:`Series` behavior.  Use the named set methods instead (:issue:`36758`)
 - :meth:`Categorical.is_dtype_equal` and :meth:`CategoricalIndex.is_dtype_equal` are deprecated, will be removed in a future version (:issue:`37545`)
+- :meth:`Series.slice_shift` and :meth:`DataFrame.slice_shift` are deprecated, use :meth:`Series.shift` or :meth:`DataFrame.shift` instead (:issue:`37601`)
+
 
 .. ---------------------------------------------------------------------------
 
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 8050ce8b1b636..36ce2c4776bd0 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -9347,10 +9347,13 @@ def shift(
     def slice_shift(self: FrameOrSeries, periods: int = 1, axis=0) -> FrameOrSeries:
         """
         Equivalent to `shift` without copying data.
-
         The shifted data will not include the dropped periods and the
         shifted axis will be smaller than the original.
 
+        .. deprecated:: 1.2.0
+            slice_shift is deprecated,
+            use DataFrame/Series.shift instead.
+
         Parameters
         ----------
         periods : int
@@ -9365,6 +9368,14 @@ def slice_shift(self: FrameOrSeries, periods: int = 1, axis=0) -> FrameOrSeries:
         While the `slice_shift` is faster than `shift`, you may pay for it
         later during alignment.
         """
+
+        msg = (
+            "The 'slice_shift' method is deprecated "
+            "and will be removed in a future version. "
+            "You can use DataFrame/Series.shift instead"
+        )
+        warnings.warn(msg, FutureWarning, stacklevel=2)
+
         if periods == 0:
             return self
 
diff --git a/pandas/tests/generic/test_finalize.py b/pandas/tests/generic/test_finalize.py
index d7aadda990f53..d16e854c25ed8 100644
--- a/pandas/tests/generic/test_finalize.py
+++ b/pandas/tests/generic/test_finalize.py
@@ -424,8 +424,6 @@
     (pd.DataFrame, frame_data, operator.methodcaller("where", np.array([[True]]))),
     (pd.Series, ([1, 2],), operator.methodcaller("mask", np.array([True, False]))),
     (pd.DataFrame, frame_data, operator.methodcaller("mask", np.array([[True]]))),
-    (pd.Series, ([1, 2],), operator.methodcaller("slice_shift")),
-    (pd.DataFrame, frame_data, operator.methodcaller("slice_shift")),
     pytest.param(
         (
             pd.Series,
diff --git a/pandas/tests/generic/test_generic.py b/pandas/tests/generic/test_generic.py
index 930c48cbdc214..7fde448bb36dc 100644
--- a/pandas/tests/generic/test_generic.py
+++ b/pandas/tests/generic/test_generic.py
@@ -480,3 +480,14 @@ def test_flags_identity(self, frame_or_series):
         assert s.flags is s.flags
         s2 = s.copy()
         assert s2.flags is not s.flags
+
+    def test_slice_shift_deprecated(self):
+        # GH 37601
+        df = DataFrame({"A": [1, 2, 3, 4]})
+        s = Series([1, 2, 3, 4])
+
+        with tm.assert_produces_warning(FutureWarning):
+            df["A"].slice_shift()
+
+        with tm.assert_produces_warning(FutureWarning):
+            s.slice_shift()

From e38e987160c792f315685dc74fc1fc33d9389a71 Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Tue, 3 Nov 2020 18:25:34 -0800
Subject: [PATCH 12/21] REF: re-use validate_setitem_value in
 Categorical.fillna (#37597)

---
 pandas/core/arrays/categorical.py               | 14 ++++----------
 pandas/tests/arrays/categorical/test_missing.py |  5 ++++-
 pandas/tests/frame/methods/test_fillna.py       |  2 +-
 pandas/tests/indexes/categorical/test_fillna.py |  4 ++--
 pandas/tests/series/methods/test_fillna.py      |  6 +++---
 5 files changed, 14 insertions(+), 17 deletions(-)

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index b1f913e9ea641..9f0414cf7a806 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -1655,21 +1655,15 @@ def fillna(self, value=None, method=None, limit=None):
             codes = self._ndarray.copy()
             mask = self.isna()
 
+            new_codes = self._validate_setitem_value(value)
+
             if isinstance(value, (np.ndarray, Categorical)):
                 # We get ndarray or Categorical if called via Series.fillna,
                 #  where it will unwrap another aligned Series before getting here
-
-                not_categories = ~algorithms.isin(value, self.categories)
-                if not isna(value[not_categories]).all():
-                    # All entries in `value` must either be a category or NA
-                    raise ValueError("fill value must be in categories")
-
-                values_codes = _get_codes_for_values(value, self.categories)
-                codes[mask] = values_codes[mask]
+                codes[mask] = new_codes[mask]
 
             else:
-                new_code = self._validate_fill_value(value)
-                codes[mask] = new_code
+                codes[mask] = new_codes
 
         return self._from_backing_data(codes)
 
diff --git a/pandas/tests/arrays/categorical/test_missing.py b/pandas/tests/arrays/categorical/test_missing.py
index 21bea9356dcf0..364c290edc46c 100644
--- a/pandas/tests/arrays/categorical/test_missing.py
+++ b/pandas/tests/arrays/categorical/test_missing.py
@@ -60,7 +60,10 @@ def test_set_item_nan(self):
             ),
             (dict(), "Must specify a fill 'value' or 'method'."),
             (dict(method="bad"), "Invalid fill method. Expecting .* bad"),
-            (dict(value=Series([1, 2, 3, 4, "a"])), "fill value must be in categories"),
+            (
+                dict(value=Series([1, 2, 3, 4, "a"])),
+                "Cannot setitem on a Categorical with a new category",
+            ),
         ],
     )
     def test_fillna_raises(self, fillna_kwargs, msg):
diff --git a/pandas/tests/frame/methods/test_fillna.py b/pandas/tests/frame/methods/test_fillna.py
index 9fa1aa65379c5..bbb57da39705b 100644
--- a/pandas/tests/frame/methods/test_fillna.py
+++ b/pandas/tests/frame/methods/test_fillna.py
@@ -170,7 +170,7 @@ def test_na_actions_categorical(self):
         res = df.fillna(value={"cats": 3, "vals": "b"})
         tm.assert_frame_equal(res, df_exp_fill)
 
-        msg = "'fill_value=4' is not present in this Categorical's categories"
+        msg = "Cannot setitem on a Categorical with a new category"
         with pytest.raises(ValueError, match=msg):
             df.fillna(value={"cats": 4, "vals": "c"})
 
diff --git a/pandas/tests/indexes/categorical/test_fillna.py b/pandas/tests/indexes/categorical/test_fillna.py
index f6a6747166011..c8fc55c29054e 100644
--- a/pandas/tests/indexes/categorical/test_fillna.py
+++ b/pandas/tests/indexes/categorical/test_fillna.py
@@ -14,7 +14,7 @@ def test_fillna_categorical(self):
         tm.assert_index_equal(idx.fillna(1.0), exp)
 
         # fill by value not in categories raises ValueError
-        msg = "'fill_value=2.0' is not present in this Categorical's categories"
+        msg = "Cannot setitem on a Categorical with a new category"
         with pytest.raises(ValueError, match=msg):
             idx.fillna(2.0)
 
@@ -36,7 +36,7 @@ def test_fillna_validates_with_no_nas(self):
         ci = CategoricalIndex([2, 3, 3])
         cat = ci._data
 
-        msg = "'fill_value=False' is not present in this Categorical's categories"
+        msg = "Cannot setitem on a Categorical with a new category"
         with pytest.raises(ValueError, match=msg):
             ci.fillna(False)
 
diff --git a/pandas/tests/series/methods/test_fillna.py b/pandas/tests/series/methods/test_fillna.py
index d45486b9bdb29..aaa58cdb390f7 100644
--- a/pandas/tests/series/methods/test_fillna.py
+++ b/pandas/tests/series/methods/test_fillna.py
@@ -653,14 +653,14 @@ def test_fillna_categorical_raises(self):
         data = ["a", np.nan, "b", np.nan, np.nan]
         ser = Series(Categorical(data, categories=["a", "b"]))
 
-        msg = "'fill_value=d' is not present in this Categorical's categories"
+        msg = "Cannot setitem on a Categorical with a new category"
         with pytest.raises(ValueError, match=msg):
             ser.fillna("d")
 
-        with pytest.raises(ValueError, match="fill value must be in categories"):
+        with pytest.raises(ValueError, match=msg):
             ser.fillna(Series("d"))
 
-        with pytest.raises(ValueError, match="fill value must be in categories"):
+        with pytest.raises(ValueError, match=msg):
             ser.fillna({1: "d", 3: "a"})
 
         msg = '"value" parameter must be a scalar or dict, but you passed a "list"'

From 54dda900a180c099eaa89ff44cea7225c9d93bf0 Mon Sep 17 00:00:00 2001
From: Fangchen Li <fangchen.li@outlook.com>
Date: Tue, 3 Nov 2020 20:57:03 -0600
Subject: [PATCH 13/21] PERF: release gil for ewma_time (#37389)

---
 pandas/_libs/window/aggregations.pyx | 49 ++++++++++++++++------------
 1 file changed, 29 insertions(+), 20 deletions(-)

diff --git a/pandas/_libs/window/aggregations.pyx b/pandas/_libs/window/aggregations.pyx
index b2dbf7802e6f0..3556085bb300b 100644
--- a/pandas/_libs/window/aggregations.pyx
+++ b/pandas/_libs/window/aggregations.pyx
@@ -1,14 +1,13 @@
 # cython: boundscheck=False, wraparound=False, cdivision=True
 
 import cython
-from cython import Py_ssize_t
 
 from libcpp.deque cimport deque
 
 import numpy as np
 
 cimport numpy as cnp
-from numpy cimport float32_t, float64_t, int64_t, ndarray, uint8_t
+from numpy cimport float32_t, float64_t, int64_t, ndarray
 
 cnp.import_array()
 
@@ -1398,7 +1397,7 @@ def roll_weighted_var(float64_t[:] values, float64_t[:] weights,
 # ----------------------------------------------------------------------
 # Exponentially weighted moving average
 
-def ewma_time(ndarray[float64_t] vals, int minp, ndarray[int64_t] times,
+def ewma_time(const float64_t[:] vals, int minp, ndarray[int64_t] times,
               int64_t halflife):
     """
     Compute exponentially-weighted moving average using halflife and time
@@ -1416,30 +1415,40 @@ def ewma_time(ndarray[float64_t] vals, int minp, ndarray[int64_t] times,
     ndarray
     """
     cdef:
-        Py_ssize_t i, num_not_nan = 0, N = len(vals)
+        Py_ssize_t i, j, num_not_nan = 0, N = len(vals)
         bint is_not_nan
-        float64_t last_result
-        ndarray[uint8_t] mask = np.zeros(N, dtype=np.uint8)
-        ndarray[float64_t] weights, observations, output = np.empty(N, dtype=np.float64)
+        float64_t last_result, weights_dot, weights_sum, weight, halflife_float
+        float64_t[:] times_float
+        float64_t[:] observations = np.zeros(N, dtype=float)
+        float64_t[:] times_masked = np.zeros(N, dtype=float)
+        ndarray[float64_t] output = np.empty(N, dtype=float)
 
     if N == 0:
         return output
 
+    halflife_float = <float64_t>halflife
+    times_float = times.astype(float)
     last_result = vals[0]
 
-    for i in range(N):
-        is_not_nan = vals[i] == vals[i]
-        num_not_nan += is_not_nan
-        if is_not_nan:
-            mask[i] = 1
-            weights = 0.5 ** ((times[i] - times[mask.view(np.bool_)]) / halflife)
-            observations = vals[mask.view(np.bool_)]
-            last_result = np.sum(weights * observations) / np.sum(weights)
-
-        if num_not_nan >= minp:
-            output[i] = last_result
-        else:
-            output[i] = NaN
+    with nogil:
+        for i in range(N):
+            is_not_nan = vals[i] == vals[i]
+            num_not_nan += is_not_nan
+            if is_not_nan:
+                times_masked[num_not_nan-1] = times_float[i]
+                observations[num_not_nan-1] = vals[i]
+
+                weights_sum = 0
+                weights_dot = 0
+                for j in range(num_not_nan):
+                    weight = 0.5 ** (
+                        (times_float[i] - times_masked[j]) / halflife_float)
+                    weights_sum += weight
+                    weights_dot += weight * observations[j]
+
+                last_result = weights_dot / weights_sum
+
+            output[i] = last_result if num_not_nan >= minp else NaN
 
     return output
 

From f6f3dd3e77278c9932105664a94aaca5c1422880 Mon Sep 17 00:00:00 2001
From: patrick <61934744+phofl@users.noreply.github.com>
Date: Wed, 4 Nov 2020 03:59:02 +0100
Subject: [PATCH 14/21] BUG: Groupy dropped nan groups from result when
 grouping over single column (#36842)

---
 doc/source/whatsnew/v1.2.0.rst              |  1 +
 pandas/_libs/lib.pyx                        | 29 +++++++++++++--------
 pandas/core/groupby/ops.py                  |  9 +++----
 pandas/core/sorting.py                      | 11 ++++++--
 pandas/tests/groupby/test_groupby.py        |  7 +++++
 pandas/tests/groupby/test_groupby_dropna.py | 20 +++++++++++++-
 pandas/tests/window/test_rolling.py         | 15 +++++++++++
 7 files changed, 72 insertions(+), 20 deletions(-)

diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
index fd5451505eefe..e811bbc9ab7a0 100644
--- a/doc/source/whatsnew/v1.2.0.rst
+++ b/doc/source/whatsnew/v1.2.0.rst
@@ -527,6 +527,7 @@ Groupby/resample/rolling
 - Using :meth:`Rolling.var()` instead of :meth:`Rolling.std()` avoids numerical issues for :meth:`Rolling.corr()` when :meth:`Rolling.var()` is still within floating point precision while :meth:`Rolling.std()` is not (:issue:`31286`)
 - Bug in :meth:`df.groupby(..).quantile() <pandas.core.groupby.DataFrameGroupBy.quantile>` and :meth:`df.resample(..).quantile() <pandas.core.resample.Resampler.quantile>` raised ``TypeError`` when values were of type ``Timedelta`` (:issue:`29485`)
 - Bug in :meth:`Rolling.median` and :meth:`Rolling.quantile` returned wrong values for :class:`BaseIndexer` subclasses with non-monotonic starting or ending points for windows (:issue:`37153`)
+- Bug in :meth:`DataFrame.groupby` dropped ``nan`` groups from result with ``dropna=False`` when grouping over a single column (:issue:`35646`, :issue:`35542`)
 
 Reshaping
 ^^^^^^^^^
diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
index e493e5e9d41d3..0b0334d52c1e9 100644
--- a/pandas/_libs/lib.pyx
+++ b/pandas/_libs/lib.pyx
@@ -896,21 +896,28 @@ def indices_fast(ndarray index, const int64_t[:] labels, list keys,
 
         if lab != cur:
             if lab != -1:
-                tup = PyTuple_New(k)
-                for j in range(k):
-                    val = keys[j][sorted_labels[j][i - 1]]
-                    PyTuple_SET_ITEM(tup, j, val)
-                    Py_INCREF(val)
-
+                if k == 1:
+                    # When k = 1 we do not want to return a tuple as key
+                    tup = keys[0][sorted_labels[0][i - 1]]
+                else:
+                    tup = PyTuple_New(k)
+                    for j in range(k):
+                        val = keys[j][sorted_labels[j][i - 1]]
+                        PyTuple_SET_ITEM(tup, j, val)
+                        Py_INCREF(val)
                 result[tup] = index[start:i]
             start = i
         cur = lab
 
-    tup = PyTuple_New(k)
-    for j in range(k):
-        val = keys[j][sorted_labels[j][n - 1]]
-        PyTuple_SET_ITEM(tup, j, val)
-        Py_INCREF(val)
+    if k == 1:
+        # When k = 1 we do not want to return a tuple as key
+        tup = keys[0][sorted_labels[0][n - 1]]
+    else:
+        tup = PyTuple_New(k)
+        for j in range(k):
+            val = keys[j][sorted_labels[j][n - 1]]
+            PyTuple_SET_ITEM(tup, j, val)
+            Py_INCREF(val)
     result[tup] = index[start:]
 
     return result
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index bca71b5c9646b..ccf23a6f24c42 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -229,12 +229,9 @@ def apply(self, f: F, data: FrameOrSeries, axis: int = 0):
     @cache_readonly
     def indices(self):
         """ dict {group name -> group indices} """
-        if len(self.groupings) == 1:
-            return self.groupings[0].indices
-        else:
-            codes_list = [ping.codes for ping in self.groupings]
-            keys = [ping.group_index for ping in self.groupings]
-            return get_indexer_dict(codes_list, keys)
+        codes_list = [ping.codes for ping in self.groupings]
+        keys = [ping.group_index for ping in self.groupings]
+        return get_indexer_dict(codes_list, keys)
 
     @property
     def codes(self) -> List[np.ndarray]:
diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py
index 2e32a7572adc7..e390229b5dcba 100644
--- a/pandas/core/sorting.py
+++ b/pandas/core/sorting.py
@@ -4,6 +4,7 @@
     TYPE_CHECKING,
     Callable,
     DefaultDict,
+    Dict,
     Iterable,
     List,
     Optional,
@@ -528,16 +529,22 @@ def get_flattened_list(
     return [tuple(array) for array in arrays.values()]
 
 
-def get_indexer_dict(label_list, keys):
+def get_indexer_dict(
+    label_list: List[np.ndarray], keys: List["Index"]
+) -> Dict[Union[str, Tuple], np.ndarray]:
     """
     Returns
     -------
-    dict
+    dict:
         Labels mapped to indexers.
     """
     shape = [len(x) for x in keys]
 
     group_index = get_group_index(label_list, shape, sort=True, xnull=True)
+    if np.all(group_index == -1):
+        # When all keys are nan and dropna=True, indices_fast can't handle this
+        # and the return is empty anyway
+        return {}
     ngroups = (
         ((group_index.size and group_index.max()) + 1)
         if is_int64_overflow_possible(shape)
diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py
index 2563eeeb68672..a0c228200e73a 100644
--- a/pandas/tests/groupby/test_groupby.py
+++ b/pandas/tests/groupby/test_groupby.py
@@ -1298,6 +1298,13 @@ def test_groupby_nat_exclude():
             grouped.get_group(pd.NaT)
 
 
+def test_groupby_two_group_keys_all_nan():
+    # GH #36842: Grouping over two group keys shouldn't raise an error
+    df = DataFrame({"a": [np.nan, np.nan], "b": [np.nan, np.nan], "c": [1, 2]})
+    result = df.groupby(["a", "b"]).indices
+    assert result == {}
+
+
 def test_groupby_2d_malformed():
     d = DataFrame(index=range(2))
     d["group"] = ["g1", "g2"]
diff --git a/pandas/tests/groupby/test_groupby_dropna.py b/pandas/tests/groupby/test_groupby_dropna.py
index 29a8f883f0ff5..02ce4dcf2ae2b 100644
--- a/pandas/tests/groupby/test_groupby_dropna.py
+++ b/pandas/tests/groupby/test_groupby_dropna.py
@@ -2,7 +2,7 @@
 import pytest
 
 import pandas as pd
-import pandas.testing as tm
+import pandas._testing as tm
 
 
 @pytest.mark.parametrize(
@@ -335,3 +335,21 @@ def test_groupby_apply_with_dropna_for_multi_index(dropna, data, selected_data,
 
     expected = pd.DataFrame(selected_data, index=mi)
     tm.assert_frame_equal(result, expected)
+
+
+def test_groupby_nan_included():
+    # GH 35646
+    data = {"group": ["g1", np.nan, "g1", "g2", np.nan], "B": [0, 1, 2, 3, 4]}
+    df = pd.DataFrame(data)
+    grouped = df.groupby("group", dropna=False)
+    result = grouped.indices
+    dtype = "int64"
+    expected = {
+        "g1": np.array([0, 2], dtype=dtype),
+        "g2": np.array([3], dtype=dtype),
+        np.nan: np.array([1, 4], dtype=dtype),
+    }
+    for result_values, expected_values in zip(result.values(), expected.values()):
+        tm.assert_numpy_array_equal(result_values, expected_values)
+    assert np.isnan(list(result.keys())[2])
+    assert list(result.keys())[0:2] == ["g1", "g2"]
diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py
index 2c8439aae75e5..02bcfab8d3388 100644
--- a/pandas/tests/window/test_rolling.py
+++ b/pandas/tests/window/test_rolling.py
@@ -1087,3 +1087,18 @@ def test_rolling_corr_timedelta_index(index, window):
     result = x.rolling(window).corr(y)
     expected = Series([np.nan, np.nan, 1, 1, 1], index=index)
     tm.assert_almost_equal(result, expected)
+
+
+def test_groupby_rolling_nan_included():
+    # GH 35542
+    data = {"group": ["g1", np.nan, "g1", "g2", np.nan], "B": [0, 1, 2, 3, 4]}
+    df = DataFrame(data)
+    result = df.groupby("group", dropna=False).rolling(1, min_periods=1).mean()
+    expected = DataFrame(
+        {"B": [0.0, 2.0, 3.0, 1.0, 4.0]},
+        index=pd.MultiIndex.from_tuples(
+            [("g1", 0), ("g1", 2), ("g2", 3), (np.nan, 1), (np.nan, 4)],
+            names=["group", None],
+        ),
+    )
+    tm.assert_frame_equal(result, expected)

From d3788525ee368db0b6565b72ab4eeba9140fd1d4 Mon Sep 17 00:00:00 2001
From: attack68 <24256554+attack68@users.noreply.github.com>
Date: Wed, 4 Nov 2020 04:00:05 +0100
Subject: [PATCH 15/21] ENH: implement timeszones support for
 read_json(orient='table') and astype() from 'object' (#35973)

---
 doc/source/whatsnew/v1.2.0.rst                |  3 ++
 pandas/core/arrays/datetimes.py               |  8 ++-
 pandas/io/json/_json.py                       |  4 +-
 pandas/io/json/_table_schema.py               |  4 --
 pandas/tests/frame/methods/test_astype.py     | 24 +++++++++
 .../tests/io/json/test_json_table_schema.py   | 54 ++++++++++++++++---
 6 files changed, 85 insertions(+), 12 deletions(-)

diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
index e811bbc9ab7a0..0937ec3866e12 100644
--- a/doc/source/whatsnew/v1.2.0.rst
+++ b/doc/source/whatsnew/v1.2.0.rst
@@ -217,6 +217,7 @@ Other enhancements
 - ``Styler`` now allows direct CSS class name addition to individual data cells (:issue:`36159`)
 - :meth:`Rolling.mean()` and :meth:`Rolling.sum()` use Kahan summation to calculate the mean to avoid numerical problems (:issue:`10319`, :issue:`11645`, :issue:`13254`, :issue:`32761`, :issue:`36031`)
 - :meth:`DatetimeIndex.searchsorted`, :meth:`TimedeltaIndex.searchsorted`, :meth:`PeriodIndex.searchsorted`, and :meth:`Series.searchsorted` with datetimelike dtypes will now try to cast string arguments (listlike and scalar) to the matching datetimelike type (:issue:`36346`)
+-
 - Added methods :meth:`IntegerArray.prod`, :meth:`IntegerArray.min`, and :meth:`IntegerArray.max` (:issue:`33790`)
 - Where possible :meth:`RangeIndex.difference` and :meth:`RangeIndex.symmetric_difference` will return :class:`RangeIndex` instead of :class:`Int64Index` (:issue:`36564`)
 - Added :meth:`Rolling.sem()` and :meth:`Expanding.sem()` to compute the standard error of mean (:issue:`26476`).
@@ -393,6 +394,8 @@ Datetimelike
 - Bug in :class:`DatetimeIndex.shift` incorrectly raising when shifting empty indexes (:issue:`14811`)
 - :class:`Timestamp` and :class:`DatetimeIndex` comparisons between timezone-aware and timezone-naive objects now follow the standard library ``datetime`` behavior, returning ``True``/``False`` for ``!=``/``==`` and raising for inequality comparisons (:issue:`28507`)
 - Bug in :meth:`DatetimeIndex.equals` and :meth:`TimedeltaIndex.equals` incorrectly considering ``int64`` indexes as equal (:issue:`36744`)
+- :meth:`to_json` and :meth:`read_json` now implements timezones parsing when orient structure is 'table'.
+- :meth:`astype` now attempts to convert to 'datetime64[ns, tz]' directly from 'object' with inferred timezone from string (:issue:`35973`).
 - Bug in :meth:`TimedeltaIndex.sum` and :meth:`Series.sum` with ``timedelta64`` dtype on an empty index or series returning ``NaT`` instead of ``Timedelta(0)`` (:issue:`31751`)
 - Bug in :meth:`DatetimeArray.shift` incorrectly allowing ``fill_value`` with a mismatched timezone (:issue:`37299`)
 - Bug in adding a :class:`BusinessDay` with nonzero ``offset`` to a non-scalar other (:issue:`37457`)
diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py
index f655d10881011..905242bfdd8ad 100644
--- a/pandas/core/arrays/datetimes.py
+++ b/pandas/core/arrays/datetimes.py
@@ -1968,7 +1968,13 @@ def sequence_to_dt64ns(
             data, inferred_tz = objects_to_datetime64ns(
                 data, dayfirst=dayfirst, yearfirst=yearfirst
             )
-            tz = _maybe_infer_tz(tz, inferred_tz)
+            if tz and inferred_tz:
+                #  two timezones: convert to intended from base UTC repr
+                data = tzconversion.tz_convert_from_utc(data.view("i8"), tz)
+                data = data.view(DT64NS_DTYPE)
+            elif inferred_tz:
+                tz = inferred_tz
+
         data_dtype = data.dtype
 
     # `data` may have originally been a Categorical[datetime64[ns, tz]],
diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py
index 98b9a585d890e..0cc6ca984b25d 100644
--- a/pandas/io/json/_json.py
+++ b/pandas/io/json/_json.py
@@ -262,7 +262,9 @@ def __init__(
 
         # NotImplemented on a column MultiIndex
         if obj.ndim == 2 and isinstance(obj.columns, MultiIndex):
-            raise NotImplementedError("orient='table' is not supported for MultiIndex")
+            raise NotImplementedError(
+                "orient='table' is not supported for MultiIndex columns"
+            )
 
         # TODO: Do this timedelta properly in objToJSON.c See GH #15137
         if (
diff --git a/pandas/io/json/_table_schema.py b/pandas/io/json/_table_schema.py
index 2b4c86b3c4406..0499a35296490 100644
--- a/pandas/io/json/_table_schema.py
+++ b/pandas/io/json/_table_schema.py
@@ -323,10 +323,6 @@ def parse_table_schema(json, precise_float):
         for field in table["schema"]["fields"]
     }
 
-    # Cannot directly use as_type with timezone data on object; raise for now
-    if any(str(x).startswith("datetime64[ns, ") for x in dtypes.values()):
-        raise NotImplementedError('table="orient" can not yet read timezone data')
-
     # No ISO constructor for Timedelta as of yet, so need to raise
     if "timedelta64" in dtypes.values():
         raise NotImplementedError(
diff --git a/pandas/tests/frame/methods/test_astype.py b/pandas/tests/frame/methods/test_astype.py
index d3f256259b15f..f05c90f37ea8a 100644
--- a/pandas/tests/frame/methods/test_astype.py
+++ b/pandas/tests/frame/methods/test_astype.py
@@ -587,3 +587,27 @@ def test_astype_ignores_errors_for_extension_dtypes(self, df, errors):
             msg = "(Cannot cast)|(could not convert)"
             with pytest.raises((ValueError, TypeError), match=msg):
                 df.astype(float, errors=errors)
+
+    def test_astype_tz_conversion(self):
+        # GH 35973
+        val = {"tz": date_range("2020-08-30", freq="d", periods=2, tz="Europe/London")}
+        df = DataFrame(val)
+        result = df.astype({"tz": "datetime64[ns, Europe/Berlin]"})
+
+        expected = df
+        expected["tz"] = expected["tz"].dt.tz_convert("Europe/Berlin")
+        tm.assert_frame_equal(result, expected)
+
+    @pytest.mark.parametrize("tz", ["UTC", "Europe/Berlin"])
+    def test_astype_tz_object_conversion(self, tz):
+        # GH 35973
+        val = {"tz": date_range("2020-08-30", freq="d", periods=2, tz="Europe/London")}
+        expected = DataFrame(val)
+
+        # convert expected to object dtype from other tz str (independently tested)
+        result = expected.astype({"tz": f"datetime64[ns, {tz}]"})
+        result = result.astype({"tz": "object"})
+
+        # do real test: object dtype to a specified tz, different from construction tz.
+        result = result.astype({"tz": "datetime64[ns, Europe/London]"})
+        tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py
index 6e35b224ef4c3..dba4b9214e50c 100644
--- a/pandas/tests/io/json/test_json_table_schema.py
+++ b/pandas/tests/io/json/test_json_table_schema.py
@@ -676,6 +676,11 @@ class TestTableOrientReader:
             {"floats": [1.0, 2.0, 3.0, 4.0]},
             {"floats": [1.1, 2.2, 3.3, 4.4]},
             {"bools": [True, False, False, True]},
+            {
+                "timezones": pd.date_range(
+                    "2016-01-01", freq="d", periods=4, tz="US/Central"
+                )  # added in # GH 35973
+            },
         ],
     )
     @pytest.mark.skipif(sys.version_info[:3] == (3, 7, 0), reason="GH-35309")
@@ -686,22 +691,59 @@ def test_read_json_table_orient(self, index_nm, vals, recwarn):
         tm.assert_frame_equal(df, result)
 
     @pytest.mark.parametrize("index_nm", [None, "idx", "index"])
+    @pytest.mark.parametrize(
+        "vals",
+        [{"timedeltas": pd.timedelta_range("1H", periods=4, freq="T")}],
+    )
+    def test_read_json_table_orient_raises(self, index_nm, vals, recwarn):
+        df = DataFrame(vals, index=pd.Index(range(4), name=index_nm))
+        out = df.to_json(orient="table")
+        with pytest.raises(NotImplementedError, match="can not yet read "):
+            pd.read_json(out, orient="table")
+
+    @pytest.mark.parametrize(
+        "idx",
+        [
+            pd.Index(range(4)),
+            pd.Index(
+                pd.date_range(
+                    "2020-08-30",
+                    freq="d",
+                    periods=4,
+                ),
+                freq=None,
+            ),
+            pd.Index(
+                pd.date_range("2020-08-30", freq="d", periods=4, tz="US/Central"),
+                freq=None,
+            ),
+            pd.MultiIndex.from_product(
+                [
+                    pd.date_range("2020-08-30", freq="d", periods=2, tz="US/Central"),
+                    ["x", "y"],
+                ],
+            ),
+        ],
+    )
     @pytest.mark.parametrize(
         "vals",
         [
-            {"timedeltas": pd.timedelta_range("1H", periods=4, freq="T")},
+            {"floats": [1.1, 2.2, 3.3, 4.4]},
+            {"dates": pd.date_range("2020-08-30", freq="d", periods=4)},
             {
                 "timezones": pd.date_range(
-                    "2016-01-01", freq="d", periods=4, tz="US/Central"
+                    "2020-08-30", freq="d", periods=4, tz="Europe/London"
                 )
             },
         ],
     )
-    def test_read_json_table_orient_raises(self, index_nm, vals, recwarn):
-        df = DataFrame(vals, index=pd.Index(range(4), name=index_nm))
+    @pytest.mark.skipif(sys.version_info[:3] == (3, 7, 0), reason="GH-35309")
+    def test_read_json_table_timezones_orient(self, idx, vals, recwarn):
+        # GH 35973
+        df = DataFrame(vals, index=idx)
         out = df.to_json(orient="table")
-        with pytest.raises(NotImplementedError, match="can not yet read "):
-            pd.read_json(out, orient="table")
+        result = pd.read_json(out, orient="table")
+        tm.assert_frame_equal(df, result)
 
     def test_comprehensive(self):
         df = DataFrame(

From a648fb2699ef44555b38db89c4af2e97cfcf8208 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Torsten=20W=C3=B6rtwein?=
 <twoertwein@users.noreply.github.com>
Date: Tue, 3 Nov 2020 23:09:16 -0500
Subject: [PATCH 16/21] REF/BUG/TYP: read_csv shouldn't close user-provided
 file handles (#36997)

* BUG/REF: read_csv shouldn't close user-provided file handles

* get_handle: typing, returns is_wrapped, use dataclass, and make sure that all created handlers are returned

* remove unused imports

* added IOHandleArgs.close

* added IOArgs.close

* mostly comments

* move memory_map from TextReader to CParserWrapper

* moved IOArgs and IOHandles

* more comments

Co-authored-by: Jeff Reback <jeff@reback.net>
---
 doc/source/whatsnew/v1.2.0.rst             |   1 +
 pandas/_libs/parsers.pyx                   | 122 ++------------
 pandas/_typing.py                          |  29 +---
 pandas/core/frame.py                       |   6 +-
 pandas/io/common.py                        | 178 ++++++++++++++++-----
 pandas/io/excel/_base.py                   |  40 +++--
 pandas/io/feather_format.py                |   9 +-
 pandas/io/formats/csvs.py                  |  54 ++-----
 pandas/io/formats/format.py                |  14 +-
 pandas/io/json/_json.py                    |  88 +++++-----
 pandas/io/orc.py                           |   1 +
 pandas/io/parsers.py                       |  92 +++++------
 pandas/io/pickle.py                        |  42 ++---
 pandas/io/sas/sas7bdat.py                  |  21 +--
 pandas/io/sas/sas_xport.py                 |  19 +--
 pandas/io/sas/sasreader.py                 |   3 +-
 pandas/io/stata.py                         | 158 ++++++++----------
 pandas/tests/frame/methods/test_to_csv.py  |   7 +-
 pandas/tests/io/json/test_readlines.py     |   2 +-
 pandas/tests/io/parser/test_common.py      |  61 ++++++-
 pandas/tests/io/parser/test_encoding.py    |   4 +
 pandas/tests/io/parser/test_textreader.py  |   7 +-
 pandas/tests/io/test_compression.py        |  26 +--
 pandas/tests/series/methods/test_to_csv.py |   6 +-
 24 files changed, 480 insertions(+), 510 deletions(-)

diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
index 0937ec3866e12..33e9bd0c2732a 100644
--- a/doc/source/whatsnew/v1.2.0.rst
+++ b/doc/source/whatsnew/v1.2.0.rst
@@ -498,6 +498,7 @@ I/O
 - Bug in output rendering of complex numbers showing too many trailing zeros (:issue:`36799`)
 - Bug in :class:`HDFStore` threw a ``TypeError`` when exporting an empty :class:`DataFrame` with ``datetime64[ns, tz]`` dtypes with a fixed HDF5 store (:issue:`20594`)
 - Bug in :class:`HDFStore` was dropping timezone information when exporting :class:`Series` with ``datetime64[ns, tz]`` dtypes with a fixed HDF5 store (:issue:`20594`)
+- :func:`read_csv` was closing user-provided binary file handles when ``engine="c"`` and an ``encoding`` was requested (:issue:`36980`)
 - Bug in :meth:`DataFrame.to_hdf` was not dropping missing rows with ``dropna=True`` (:issue:`35719`)
 
 Plotting
diff --git a/pandas/_libs/parsers.pyx b/pandas/_libs/parsers.pyx
index b87e46f9b6648..4b7a47c5f93c2 100644
--- a/pandas/_libs/parsers.pyx
+++ b/pandas/_libs/parsers.pyx
@@ -1,15 +1,10 @@
 # Copyright (c) 2012, Lambda Foundry, Inc.
 # See LICENSE for the license
-import bz2
 from csv import QUOTE_MINIMAL, QUOTE_NONE, QUOTE_NONNUMERIC
 from errno import ENOENT
-import gzip
-import io
-import os
 import sys
 import time
 import warnings
-import zipfile
 
 from libc.stdlib cimport free
 from libc.string cimport strcasecmp, strlen, strncpy
@@ -17,7 +12,7 @@ from libc.string cimport strcasecmp, strlen, strncpy
 import cython
 from cython import Py_ssize_t
 
-from cpython.bytes cimport PyBytes_AsString, PyBytes_FromString
+from cpython.bytes cimport PyBytes_AsString
 from cpython.exc cimport PyErr_Fetch, PyErr_Occurred
 from cpython.object cimport PyObject
 from cpython.ref cimport Py_XDECREF
@@ -67,7 +62,6 @@ from pandas._libs.khash cimport (
     khiter_t,
 )
 
-from pandas.compat import get_lzma_file, import_lzma
 from pandas.errors import DtypeWarning, EmptyDataError, ParserError, ParserWarning
 
 from pandas.core.dtypes.common import (
@@ -82,11 +76,10 @@ from pandas.core.dtypes.common import (
 )
 from pandas.core.dtypes.concat import union_categoricals
 
-lzma = import_lzma()
-
 cdef:
     float64_t INF = <float64_t>np.inf
     float64_t NEGINF = -INF
+    int64_t DEFAULT_CHUNKSIZE = 256 * 1024
 
 
 cdef extern from "headers/portable.h":
@@ -275,14 +268,15 @@ cdef extern from "parser/io.h":
                           size_t *bytes_read, int *status)
 
 
-DEFAULT_CHUNKSIZE = 256 * 1024
-
-
 cdef class TextReader:
     """
 
     # source: StringIO or file object
 
+    ..versionchange:: 1.2.0
+        removed 'compression', 'memory_map', and 'encoding' argument.
+        These arguments are outsourced to CParserWrapper.
+        'source' has to be a file handle.
     """
 
     cdef:
@@ -299,7 +293,7 @@ cdef class TextReader:
 
     cdef public:
         int64_t leading_cols, table_width, skipfooter, buffer_lines
-        bint allow_leading_cols, mangle_dupe_cols, memory_map, low_memory
+        bint allow_leading_cols, mangle_dupe_cols, low_memory
         bint delim_whitespace
         object delimiter, converters
         object na_values
@@ -307,8 +301,6 @@ cdef class TextReader:
         object index_col
         object skiprows
         object dtype
-        object encoding
-        object compression
         object usecols
         list dtype_cast_order
         set unnamed_cols
@@ -321,10 +313,8 @@ cdef class TextReader:
                   header_end=0,
                   index_col=None,
                   names=None,
-                  bint memory_map=False,
                   tokenize_chunksize=DEFAULT_CHUNKSIZE,
                   bint delim_whitespace=False,
-                  compression=None,
                   converters=None,
                   bint skipinitialspace=False,
                   escapechar=None,
@@ -332,7 +322,6 @@ cdef class TextReader:
                   quotechar=b'"',
                   quoting=0,
                   lineterminator=None,
-                  encoding=None,
                   comment=None,
                   decimal=b'.',
                   thousands=None,
@@ -356,15 +345,7 @@ cdef class TextReader:
                   bint skip_blank_lines=True):
 
         # set encoding for native Python and C library
-        if encoding is not None:
-            if not isinstance(encoding, bytes):
-                encoding = encoding.encode('utf-8')
-            encoding = encoding.lower()
-            self.c_encoding = <char*>encoding
-        else:
-            self.c_encoding = NULL
-
-        self.encoding = encoding
+        self.c_encoding = NULL
 
         self.parser = parser_new()
         self.parser.chunksize = tokenize_chunksize
@@ -374,9 +355,6 @@ cdef class TextReader:
         # For timekeeping
         self.clocks = []
 
-        self.compression = compression
-        self.memory_map = memory_map
-
         self.parser.usecols = (usecols is not None)
 
         self._setup_parser_source(source)
@@ -562,11 +540,6 @@ cdef class TextReader:
         parser_del(self.parser)
 
     def close(self):
-        # we need to properly close an open derived
-        # filehandle here, e.g. and UTFRecoder
-        if self.handle is not None:
-            self.handle.close()
-
         # also preemptively free all allocated memory
         parser_free(self.parser)
         if self.true_set:
@@ -614,82 +587,15 @@ cdef class TextReader:
         cdef:
             void *ptr
 
-        self.parser.cb_io = NULL
-        self.parser.cb_cleanup = NULL
-
-        if self.compression:
-            if self.compression == 'gzip':
-                if isinstance(source, str):
-                    source = gzip.GzipFile(source, 'rb')
-                else:
-                    source = gzip.GzipFile(fileobj=source)
-            elif self.compression == 'bz2':
-                source = bz2.BZ2File(source, 'rb')
-            elif self.compression == 'zip':
-                zip_file = zipfile.ZipFile(source)
-                zip_names = zip_file.namelist()
-
-                if len(zip_names) == 1:
-                    file_name = zip_names.pop()
-                    source = zip_file.open(file_name)
-
-                elif len(zip_names) == 0:
-                    raise ValueError(f'Zero files found in compressed '
-                                     f'zip file {source}')
-                else:
-                    raise ValueError(f'Multiple files found in compressed '
-                                     f'zip file {zip_names}')
-            elif self.compression == 'xz':
-                if isinstance(source, str):
-                    source = get_lzma_file(lzma)(source, 'rb')
-                else:
-                    source = get_lzma_file(lzma)(filename=source)
-            else:
-                raise ValueError(f'Unrecognized compression type: '
-                                 f'{self.compression}')
-
-            if (self.encoding and hasattr(source, "read") and
-                    not hasattr(source, "encoding")):
-                source = io.TextIOWrapper(
-                    source, self.encoding.decode('utf-8'), newline='')
-
-                self.encoding = b'utf-8'
-                self.c_encoding = <char*>self.encoding
-
-            self.handle = source
-
-        if isinstance(source, str):
-            encoding = sys.getfilesystemencoding() or "utf-8"
-            usource = source
-            source = source.encode(encoding)
-
-            if self.memory_map:
-                ptr = new_mmap(source)
-                if ptr == NULL:
-                    # fall back
-                    ptr = new_file_source(source, self.parser.chunksize)
-                    self.parser.cb_io = &buffer_file_bytes
-                    self.parser.cb_cleanup = &del_file_source
-                else:
-                    self.parser.cb_io = &buffer_mmap_bytes
-                    self.parser.cb_cleanup = &del_mmap
-            else:
-                ptr = new_file_source(source, self.parser.chunksize)
-                self.parser.cb_io = &buffer_file_bytes
-                self.parser.cb_cleanup = &del_file_source
-            self.parser.source = ptr
-
-        elif hasattr(source, 'read'):
-            # e.g., StringIO
-
-            ptr = new_rd_source(source)
-            self.parser.source = ptr
-            self.parser.cb_io = &buffer_rd_bytes
-            self.parser.cb_cleanup = &del_rd_source
-        else:
+        if not hasattr(source, "read"):
             raise IOError(f'Expected file path name or file-like object, '
                           f'got {type(source)} type')
 
+        ptr = new_rd_source(source)
+        self.parser.source = ptr
+        self.parser.cb_io = &buffer_rd_bytes
+        self.parser.cb_cleanup = &del_rd_source
+
     cdef _get_header(self):
         # header is now a list of lists, so field_count should use header[0]
 
diff --git a/pandas/_typing.py b/pandas/_typing.py
index 3376559fb23ff..3e89cf24632e2 100644
--- a/pandas/_typing.py
+++ b/pandas/_typing.py
@@ -1,6 +1,6 @@
-from dataclasses import dataclass
 from datetime import datetime, timedelta, tzinfo
-from io import IOBase
+from io import BufferedIOBase, RawIOBase, TextIOBase, TextIOWrapper
+from mmap import mmap
 from pathlib import Path
 from typing import (
     IO,
@@ -10,7 +10,6 @@
     Callable,
     Collection,
     Dict,
-    Generic,
     Hashable,
     List,
     Mapping,
@@ -77,8 +76,6 @@
     "ExtensionDtype", str, np.dtype, Type[Union[str, float, int, complex, bool, object]]
 ]
 DtypeObj = Union[np.dtype, "ExtensionDtype"]
-FilePathOrBuffer = Union[str, Path, IO[AnyStr], IOBase]
-FileOrBuffer = Union[str, IO[AnyStr], IOBase]
 
 # FrameOrSeriesUnion  means either a DataFrame or a Series. E.g.
 # `def func(a: FrameOrSeriesUnion) -> FrameOrSeriesUnion: ...` means that if a Series
@@ -133,6 +130,10 @@
     "Resampler",
 ]
 
+# filenames and file-like-objects
+Buffer = Union[IO[AnyStr], RawIOBase, BufferedIOBase, TextIOBase, TextIOWrapper, mmap]
+FileOrBuffer = Union[str, Buffer[T]]
+FilePathOrBuffer = Union[Path, FileOrBuffer[T]]
 
 # for arbitrary kwargs passed during reading/writing files
 StorageOptions = Optional[Dict[str, Any]]
@@ -150,21 +151,3 @@
 
 # type of float formatter in DataFrameFormatter
 FloatFormatType = Union[str, Callable, "EngFormatter"]
-
-
-@dataclass
-class IOargs(Generic[ModeVar, EncodingVar]):
-    """
-    Return value of io/common.py:get_filepath_or_buffer.
-
-    Note (copy&past from io/parsers):
-    filepath_or_buffer can be Union[FilePathOrBuffer, s3fs.S3File, gcsfs.GCSFile]
-    though mypy handling of conditional imports is difficult.
-    See https://github.com/python/mypy/issues/1297
-    """
-
-    filepath_or_buffer: FileOrBuffer
-    encoding: EncodingVar
-    compression: CompressionDict
-    should_close: bool
-    mode: Union[ModeVar, str]
diff --git a/pandas/core/frame.py b/pandas/core/frame.py
index 24b89085ac121..a3130ec27713d 100644
--- a/pandas/core/frame.py
+++ b/pandas/core/frame.py
@@ -15,6 +15,7 @@
 import datetime
 from io import StringIO
 import itertools
+import mmap
 from textwrap import dedent
 from typing import (
     IO,
@@ -2286,10 +2287,9 @@ def to_markdown(
         if buf is None:
             return result
         ioargs = get_filepath_or_buffer(buf, mode=mode, storage_options=storage_options)
-        assert not isinstance(ioargs.filepath_or_buffer, str)
+        assert not isinstance(ioargs.filepath_or_buffer, (str, mmap.mmap))
         ioargs.filepath_or_buffer.writelines(result)
-        if ioargs.should_close:
-            ioargs.filepath_or_buffer.close()
+        ioargs.close()
         return None
 
     @deprecate_kwarg(old_arg_name="fname", new_arg_name="path")
diff --git a/pandas/io/common.py b/pandas/io/common.py
index c147ae9fd0aa8..90a79e54015c4 100644
--- a/pandas/io/common.py
+++ b/pandas/io/common.py
@@ -2,8 +2,9 @@
 
 import bz2
 from collections import abc
+import dataclasses
 import gzip
-from io import BufferedIOBase, BytesIO, RawIOBase
+from io import BufferedIOBase, BytesIO, RawIOBase, TextIOWrapper
 import mmap
 import os
 import pathlib
@@ -13,12 +14,14 @@
     Any,
     AnyStr,
     Dict,
+    Generic,
     List,
     Mapping,
     Optional,
     Tuple,
     Type,
     Union,
+    cast,
 )
 from urllib.parse import (
     urljoin,
@@ -31,12 +34,12 @@
 import zipfile
 
 from pandas._typing import (
+    Buffer,
     CompressionDict,
     CompressionOptions,
     EncodingVar,
     FileOrBuffer,
     FilePathOrBuffer,
-    IOargs,
     ModeVar,
     StorageOptions,
 )
@@ -56,6 +59,76 @@
     from io import IOBase
 
 
+@dataclasses.dataclass
+class IOArgs(Generic[ModeVar, EncodingVar]):
+    """
+    Return value of io/common.py:get_filepath_or_buffer.
+
+    This is used to easily close created fsspec objects.
+
+    Note (copy&past from io/parsers):
+    filepath_or_buffer can be Union[FilePathOrBuffer, s3fs.S3File, gcsfs.GCSFile]
+    though mypy handling of conditional imports is difficult.
+    See https://github.com/python/mypy/issues/1297
+    """
+
+    filepath_or_buffer: FileOrBuffer
+    encoding: EncodingVar
+    mode: Union[ModeVar, str]
+    compression: CompressionDict
+    should_close: bool = False
+
+    def close(self) -> None:
+        """
+        Close the buffer if it was created by get_filepath_or_buffer.
+        """
+        if self.should_close:
+            assert not isinstance(self.filepath_or_buffer, str)
+            try:
+                self.filepath_or_buffer.close()
+            except (OSError, ValueError):
+                pass
+        self.should_close = False
+
+
+@dataclasses.dataclass
+class IOHandles:
+    """
+    Return value of io/common.py:get_handle
+
+    This is used to easily close created buffers and to handle corner cases when
+    TextIOWrapper is inserted.
+
+    handle: The file handle to be used.
+    created_handles: All file handles that are created by get_handle
+    is_wrapped: Whether a TextIOWrapper needs to be detached.
+    """
+
+    handle: Buffer
+    created_handles: List[Buffer] = dataclasses.field(default_factory=list)
+    is_wrapped: bool = False
+
+    def close(self) -> None:
+        """
+        Close all created buffers.
+
+        Note: If a TextIOWrapper was inserted, it is flushed and detached to
+        avoid closing the potentially user-created buffer.
+        """
+        if self.is_wrapped:
+            assert isinstance(self.handle, TextIOWrapper)
+            self.handle.flush()
+            self.handle.detach()
+            self.created_handles.remove(self.handle)
+        try:
+            for handle in self.created_handles:
+                handle.close()
+        except (OSError, ValueError):
+            pass
+        self.created_handles = []
+        self.is_wrapped = False
+
+
 def is_url(url) -> bool:
     """
     Check to see if a URL has a valid protocol.
@@ -176,7 +249,7 @@ def get_filepath_or_buffer(
     compression: CompressionOptions = None,
     mode: ModeVar = None,  # type: ignore[assignment]
     storage_options: StorageOptions = None,
-) -> IOargs[ModeVar, EncodingVar]:
+) -> IOArgs[ModeVar, EncodingVar]:
     """
     If the filepath_or_buffer is a url, translate and return the buffer.
     Otherwise passthrough.
@@ -201,7 +274,7 @@ def get_filepath_or_buffer(
 
     ..versionchange:: 1.2.0
 
-      Returns the dataclass IOargs.
+      Returns the dataclass IOArgs.
     """
     filepath_or_buffer = stringify_path(filepath_or_buffer)
 
@@ -225,6 +298,10 @@ def get_filepath_or_buffer(
 
     compression = dict(compression, method=compression_method)
 
+    # uniform encoding names
+    if encoding is not None:
+        encoding = encoding.replace("_", "-").lower()
+
     # bz2 and xz do not write the byte order mark for utf-16 and utf-32
     # print a warning when writing such files
     if (
@@ -258,7 +335,7 @@ def get_filepath_or_buffer(
             compression = {"method": "gzip"}
         reader = BytesIO(req.read())
         req.close()
-        return IOargs(
+        return IOArgs(
             filepath_or_buffer=reader,
             encoding=encoding,
             compression=compression,
@@ -310,7 +387,7 @@ def get_filepath_or_buffer(
                 filepath_or_buffer, mode=fsspec_mode, **(storage_options or {})
             ).open()
 
-        return IOargs(
+        return IOArgs(
             filepath_or_buffer=file_obj,
             encoding=encoding,
             compression=compression,
@@ -323,7 +400,7 @@ def get_filepath_or_buffer(
         )
 
     if isinstance(filepath_or_buffer, (str, bytes, mmap.mmap)):
-        return IOargs(
+        return IOArgs(
             filepath_or_buffer=_expand_user(filepath_or_buffer),
             encoding=encoding,
             compression=compression,
@@ -335,7 +412,7 @@ def get_filepath_or_buffer(
         msg = f"Invalid file path or buffer object type: {type(filepath_or_buffer)}"
         raise ValueError(msg)
 
-    return IOargs(
+    return IOArgs(
         filepath_or_buffer=filepath_or_buffer,
         encoding=encoding,
         compression=compression,
@@ -455,14 +532,14 @@ def infer_compression(
 
 
 def get_handle(
-    path_or_buf,
+    path_or_buf: FilePathOrBuffer,
     mode: str,
-    encoding=None,
+    encoding: Optional[str] = None,
     compression: CompressionOptions = None,
     memory_map: bool = False,
     is_text: bool = True,
-    errors=None,
-):
+    errors: Optional[str] = None,
+) -> IOHandles:
     """
     Get file handle for given path/buffer and mode.
 
@@ -506,14 +583,9 @@ def get_handle(
         See the errors argument for :func:`open` for a full list
         of options.
 
-        .. versionadded:: 1.1.0
+    .. versionchanged:: 1.2.0
 
-    Returns
-    -------
-    f : file-like
-        A file-like object.
-    handles : list of file-like objects
-        A list of file-like object that were opened in this function.
+    Returns the dataclass IOHandles
     """
     need_text_wrapping: Tuple[Type["IOBase"], ...]
     try:
@@ -532,12 +604,16 @@ def get_handle(
     except ImportError:
         pass
 
-    handles: List[Union[IO, _MMapWrapper]] = list()
-    f = path_or_buf
+    handles: List[Buffer] = list()
+
+    # Windows does not default to utf-8. Set to utf-8 for a consistent behavior
+    if encoding is None:
+        encoding = "utf-8"
 
     # Convert pathlib.Path/py.path.local or string
     path_or_buf = stringify_path(path_or_buf)
     is_path = isinstance(path_or_buf, str)
+    f = path_or_buf
 
     compression, compression_args = get_compression_method(compression)
     if is_path:
@@ -548,25 +624,29 @@ def get_handle(
         # GZ Compression
         if compression == "gzip":
             if is_path:
+                assert isinstance(path_or_buf, str)
                 f = gzip.GzipFile(filename=path_or_buf, mode=mode, **compression_args)
             else:
-                f = gzip.GzipFile(fileobj=path_or_buf, mode=mode, **compression_args)
+                f = gzip.GzipFile(
+                    fileobj=path_or_buf,  # type: ignore[arg-type]
+                    mode=mode,
+                    **compression_args,
+                )
 
         # BZ Compression
         elif compression == "bz2":
-            f = bz2.BZ2File(path_or_buf, mode=mode, **compression_args)
+            f = bz2.BZ2File(
+                path_or_buf, mode=mode, **compression_args  # type: ignore[arg-type]
+            )
 
         # ZIP Compression
         elif compression == "zip":
-            zf = _BytesZipFile(path_or_buf, mode, **compression_args)
-            # Ensure the container is closed as well.
-            handles.append(zf)
-            if zf.mode == "w":
-                f = zf
-            elif zf.mode == "r":
-                zip_names = zf.namelist()
+            f = _BytesZipFile(path_or_buf, mode, **compression_args)
+            if f.mode == "r":
+                handles.append(f)
+                zip_names = f.namelist()
                 if len(zip_names) == 1:
-                    f = zf.open(zip_names.pop())
+                    f = f.open(zip_names.pop())
                 elif len(zip_names) == 0:
                     raise ValueError(f"Zero files found in ZIP file {path_or_buf}")
                 else:
@@ -584,36 +664,40 @@ def get_handle(
             msg = f"Unrecognized compression type: {compression}"
             raise ValueError(msg)
 
+        assert not isinstance(f, str)
         handles.append(f)
 
     elif is_path:
         # Check whether the filename is to be opened in binary mode.
         # Binary mode does not support 'encoding' and 'newline'.
         is_binary_mode = "b" in mode
-
+        assert isinstance(path_or_buf, str)
         if encoding and not is_binary_mode:
             # Encoding
             f = open(path_or_buf, mode, encoding=encoding, errors=errors, newline="")
-        elif is_text and not is_binary_mode:
-            # No explicit encoding
-            f = open(path_or_buf, mode, errors="replace", newline="")
         else:
             # Binary mode
             f = open(path_or_buf, mode)
         handles.append(f)
 
     # Convert BytesIO or file objects passed with an encoding
-    if is_text and (compression or isinstance(f, need_text_wrapping)):
-        from io import TextIOWrapper
-
-        g = TextIOWrapper(f, encoding=encoding, errors=errors, newline="")
-        if not isinstance(f, (BufferedIOBase, RawIOBase)):
-            handles.append(g)
-        f = g
+    is_wrapped = False
+    if is_text and (
+        compression
+        or isinstance(f, need_text_wrapping)
+        or "b" in getattr(f, "mode", "")
+    ):
+        f = TextIOWrapper(
+            f, encoding=encoding, errors=errors, newline=""  # type: ignore[arg-type]
+        )
+        handles.append(f)
+        # do not mark as wrapped when the user provided a string
+        is_wrapped = not is_path
 
     if memory_map and hasattr(f, "fileno"):
+        assert not isinstance(f, str)
         try:
-            wrapped = _MMapWrapper(f)
+            wrapped = cast(mmap.mmap, _MMapWrapper(f))  # type: ignore[arg-type]
             f.close()
             handles.remove(f)
             handles.append(wrapped)
@@ -625,7 +709,13 @@ def get_handle(
             # leave the file handler as is then
             pass
 
-    return f, handles
+    handles.reverse()  # close the most recently added buffer first
+    assert not isinstance(f, str)
+    return IOHandles(
+        handle=f,
+        created_handles=handles,
+        is_wrapped=is_wrapped,
+    )
 
 
 # error: Definition of "__exit__" in base class "ZipFile" is incompatible with
diff --git a/pandas/io/excel/_base.py b/pandas/io/excel/_base.py
index 3461652f4ea24..03c61c3ed8376 100644
--- a/pandas/io/excel/_base.py
+++ b/pandas/io/excel/_base.py
@@ -17,6 +17,7 @@
 from pandas.core.frame import DataFrame
 
 from pandas.io.common import (
+    IOArgs,
     get_filepath_or_buffer,
     is_url,
     stringify_path,
@@ -349,24 +350,37 @@ def read_excel(
 
 class BaseExcelReader(metaclass=abc.ABCMeta):
     def __init__(self, filepath_or_buffer, storage_options: StorageOptions = None):
+        self.ioargs = IOArgs(
+            filepath_or_buffer=filepath_or_buffer,
+            encoding=None,
+            mode=None,
+            compression={"method": None},
+        )
         # If filepath_or_buffer is a url, load the data into a BytesIO
         if is_url(filepath_or_buffer):
-            filepath_or_buffer = BytesIO(urlopen(filepath_or_buffer).read())
+            self.ioargs = IOArgs(
+                filepath_or_buffer=BytesIO(urlopen(filepath_or_buffer).read()),
+                should_close=True,
+                encoding=None,
+                mode=None,
+                compression={"method": None},
+            )
         elif not isinstance(filepath_or_buffer, (ExcelFile, self._workbook_class)):
-            filepath_or_buffer = get_filepath_or_buffer(
+            self.ioargs = get_filepath_or_buffer(
                 filepath_or_buffer, storage_options=storage_options
-            ).filepath_or_buffer
+            )
 
-        if isinstance(filepath_or_buffer, self._workbook_class):
-            self.book = filepath_or_buffer
-        elif hasattr(filepath_or_buffer, "read"):
+        if isinstance(self.ioargs.filepath_or_buffer, self._workbook_class):
+            self.book = self.ioargs.filepath_or_buffer
+        elif hasattr(self.ioargs.filepath_or_buffer, "read"):
             # N.B. xlrd.Book has a read attribute too
-            filepath_or_buffer.seek(0)
-            self.book = self.load_workbook(filepath_or_buffer)
-        elif isinstance(filepath_or_buffer, str):
-            self.book = self.load_workbook(filepath_or_buffer)
-        elif isinstance(filepath_or_buffer, bytes):
-            self.book = self.load_workbook(BytesIO(filepath_or_buffer))
+            assert not isinstance(self.ioargs.filepath_or_buffer, str)
+            self.ioargs.filepath_or_buffer.seek(0)
+            self.book = self.load_workbook(self.ioargs.filepath_or_buffer)
+        elif isinstance(self.ioargs.filepath_or_buffer, str):
+            self.book = self.load_workbook(self.ioargs.filepath_or_buffer)
+        elif isinstance(self.ioargs.filepath_or_buffer, bytes):
+            self.book = self.load_workbook(BytesIO(self.ioargs.filepath_or_buffer))
         else:
             raise ValueError(
                 "Must explicitly set engine if not passing in buffer or path for io."
@@ -382,7 +396,7 @@ def load_workbook(self, filepath_or_buffer):
         pass
 
     def close(self):
-        pass
+        self.ioargs.close()
 
     @property
     @abc.abstractmethod
diff --git a/pandas/io/feather_format.py b/pandas/io/feather_format.py
index 9a42b8289ab47..198acd5862d45 100644
--- a/pandas/io/feather_format.py
+++ b/pandas/io/feather_format.py
@@ -81,9 +81,7 @@ def to_feather(
 
     feather.write_feather(df, ioargs.filepath_or_buffer, **kwargs)
 
-    if ioargs.should_close:
-        assert not isinstance(ioargs.filepath_or_buffer, str)
-        ioargs.filepath_or_buffer.close()
+    ioargs.close()
 
 
 def read_feather(
@@ -137,9 +135,6 @@ def read_feather(
         ioargs.filepath_or_buffer, columns=columns, use_threads=bool(use_threads)
     )
 
-    # s3fs only validates the credentials when the file is closed.
-    if ioargs.should_close:
-        assert not isinstance(ioargs.filepath_or_buffer, str)
-        ioargs.filepath_or_buffer.close()
+    ioargs.close()
 
     return df
diff --git a/pandas/io/formats/csvs.py b/pandas/io/formats/csvs.py
index 6c62d6825bc84..20226dbb3c9d4 100644
--- a/pandas/io/formats/csvs.py
+++ b/pandas/io/formats/csvs.py
@@ -3,7 +3,6 @@
 """
 
 import csv as csvlib
-from io import StringIO, TextIOWrapper
 import os
 from typing import TYPE_CHECKING, Any, Dict, Iterator, List, Optional, Sequence, Union
 
@@ -39,7 +38,7 @@ class CSVFormatter:
     def __init__(
         self,
         formatter: "DataFrameFormatter",
-        path_or_buf: Optional[FilePathOrBuffer[str]] = None,
+        path_or_buf: FilePathOrBuffer[str] = "",
         sep: str = ",",
         cols: Optional[Sequence[Label]] = None,
         index_label: Optional[IndexLabel] = None,
@@ -60,25 +59,14 @@ def __init__(
 
         self.obj = self.fmt.frame
 
-        self.encoding = encoding or "utf-8"
-
-        if path_or_buf is None:
-            path_or_buf = StringIO()
-
-        ioargs = get_filepath_or_buffer(
+        self.ioargs = get_filepath_or_buffer(
             path_or_buf,
-            encoding=self.encoding,
+            encoding=encoding,
             compression=compression,
             mode=mode,
             storage_options=storage_options,
         )
 
-        self.compression = ioargs.compression.pop("method")
-        self.compression_args = ioargs.compression
-        self.path_or_buf = ioargs.filepath_or_buffer
-        self.should_close = ioargs.should_close
-        self.mode = ioargs.mode
-
         self.sep = sep
         self.index_label = self._initialize_index_label(index_label)
         self.errors = errors
@@ -238,20 +226,19 @@ def save(self) -> None:
         """
         Create the writer & save.
         """
-        # get a handle or wrap an existing handle to take care of 1) compression and
-        # 2) text -> byte conversion
-        f, handles = get_handle(
-            self.path_or_buf,
-            self.mode,
-            encoding=self.encoding,
+        # apply compression and byte/text conversion
+        handles = get_handle(
+            self.ioargs.filepath_or_buffer,
+            self.ioargs.mode,
+            encoding=self.ioargs.encoding,
             errors=self.errors,
-            compression=dict(self.compression_args, method=self.compression),
+            compression=self.ioargs.compression,
         )
 
         try:
             # Note: self.encoding is irrelevant here
             self.writer = csvlib.writer(
-                f,
+                handles.handle,  # type: ignore[arg-type]
                 lineterminator=self.line_terminator,
                 delimiter=self.sep,
                 quoting=self.quoting,
@@ -263,23 +250,10 @@ def save(self) -> None:
             self._save()
 
         finally:
-            if self.should_close:
-                f.close()
-            elif (
-                isinstance(f, TextIOWrapper)
-                and not f.closed
-                and f != self.path_or_buf
-                and hasattr(self.path_or_buf, "write")
-            ):
-                # get_handle uses TextIOWrapper for non-binary handles. TextIOWrapper
-                # closes the wrapped handle if it is not detached.
-                f.flush()  # make sure everything is written
-                f.detach()  # makes f unusable
-                del f
-            elif f != self.path_or_buf:
-                f.close()
-            for _fh in handles:
-                _fh.close()
+            # close compression and byte/text wrapper
+            handles.close()
+            # close any fsspec-like objects
+            self.ioargs.close()
 
     def _save(self) -> None:
         if self._need_to_save_header:
diff --git a/pandas/io/formats/format.py b/pandas/io/formats/format.py
index 3c759f477899b..43e76d0aef490 100644
--- a/pandas/io/formats/format.py
+++ b/pandas/io/formats/format.py
@@ -1046,8 +1046,12 @@ def to_csv(
         """
         from pandas.io.formats.csvs import CSVFormatter
 
+        created_buffer = path_or_buf is None
+        if created_buffer:
+            path_or_buf = StringIO()
+
         csv_formatter = CSVFormatter(
-            path_or_buf=path_or_buf,
+            path_or_buf=path_or_buf,  # type: ignore[arg-type]
             line_terminator=line_terminator,
             sep=sep,
             encoding=encoding,
@@ -1067,9 +1071,11 @@ def to_csv(
         )
         csv_formatter.save()
 
-        if path_or_buf is None:
-            assert isinstance(csv_formatter.path_or_buf, StringIO)
-            return csv_formatter.path_or_buf.getvalue()
+        if created_buffer:
+            assert isinstance(path_or_buf, StringIO)
+            content = path_or_buf.getvalue()
+            path_or_buf.close()
+            return content
 
         return None
 
diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py
index 0cc6ca984b25d..040279b9f3e67 100644
--- a/pandas/io/json/_json.py
+++ b/pandas/io/json/_json.py
@@ -1,10 +1,10 @@
 from abc import ABC, abstractmethod
 from collections import abc
 import functools
-from io import BytesIO, StringIO
+from io import StringIO
 from itertools import islice
 import os
-from typing import IO, Any, Callable, List, Mapping, Optional, Tuple, Type, Union
+from typing import Any, Callable, Mapping, Optional, Tuple, Type, Union
 
 import numpy as np
 
@@ -26,7 +26,12 @@
 from pandas.core.generic import NDFrame
 from pandas.core.reshape.concat import concat
 
-from pandas.io.common import get_compression_method, get_filepath_or_buffer, get_handle
+from pandas.io.common import (
+    IOHandles,
+    get_compression_method,
+    get_filepath_or_buffer,
+    get_handle,
+)
 from pandas.io.json._normalize import convert_to_line_delimits
 from pandas.io.json._table_schema import build_table_schema, parse_table_schema
 from pandas.io.parsers import validate_integer
@@ -59,17 +64,6 @@ def to_json(
             "'index=False' is only valid when 'orient' is 'split' or 'table'"
         )
 
-    if path_or_buf is not None:
-        ioargs = get_filepath_or_buffer(
-            path_or_buf,
-            compression=compression,
-            mode="wt",
-            storage_options=storage_options,
-        )
-        path_or_buf = ioargs.filepath_or_buffer
-        should_close = ioargs.should_close
-        compression = ioargs.compression
-
     if lines and orient != "records":
         raise ValueError("'lines' keyword only valid when 'orient' is records")
 
@@ -101,20 +95,27 @@ def to_json(
     if lines:
         s = convert_to_line_delimits(s)
 
-    if isinstance(path_or_buf, str):
-        fh, handles = get_handle(path_or_buf, "w", compression=compression)
+    if path_or_buf is not None:
+        # open fsspec URLs
+        ioargs = get_filepath_or_buffer(
+            path_or_buf,
+            compression=compression,
+            mode="wt",
+            storage_options=storage_options,
+        )
+        # apply compression and byte/text conversion
+        handles = get_handle(
+            ioargs.filepath_or_buffer, "w", compression=ioargs.compression
+        )
         try:
-            fh.write(s)
+            handles.handle.write(s)
         finally:
-            fh.close()
-        for handle in handles:
-            handle.close()
-    elif path_or_buf is None:
-        return s
+            # close compression and byte/text wrapper
+            handles.close()
+            # close any fsspec-like objects
+            ioargs.close()
     else:
-        path_or_buf.write(s)
-        if should_close:
-            path_or_buf.close()
+        return s
 
 
 class Writer(ABC):
@@ -547,12 +548,10 @@ def read_json(
         dtype = True
     if convert_axes is None and orient != "table":
         convert_axes = True
-    if encoding is None:
-        encoding = "utf-8"
 
     ioargs = get_filepath_or_buffer(
         path_or_buf,
-        encoding=encoding,
+        encoding=encoding or "utf-8",
         compression=compression,
         storage_options=storage_options,
     )
@@ -579,9 +578,7 @@ def read_json(
         return json_reader
 
     result = json_reader.read()
-    if ioargs.should_close:
-        assert not isinstance(ioargs.filepath_or_buffer, str)
-        ioargs.filepath_or_buffer.close()
+    ioargs.close()
 
     return result
 
@@ -631,9 +628,8 @@ def __init__(
         self.lines = lines
         self.chunksize = chunksize
         self.nrows_seen = 0
-        self.should_close = False
         self.nrows = nrows
-        self.file_handles: List[IO] = []
+        self.handles: Optional[IOHandles] = None
 
         if self.chunksize is not None:
             self.chunksize = validate_integer("chunksize", self.chunksize, 1)
@@ -672,30 +668,25 @@ def _get_data_from_filepath(self, filepath_or_buffer):
         This method turns (1) into (2) to simplify the rest of the processing.
         It returns input types (2) and (3) unchanged.
         """
-        data = filepath_or_buffer
-
+        # if it is a string but the file does not exist, it might be a JSON string
         exists = False
-        if isinstance(data, str):
+        if isinstance(filepath_or_buffer, str):
             try:
                 exists = os.path.exists(filepath_or_buffer)
             # gh-5874: if the filepath is too long will raise here
             except (TypeError, ValueError):
                 pass
 
-        if exists or self.compression["method"] is not None:
-            data, self.file_handles = get_handle(
+        if exists or not isinstance(filepath_or_buffer, str):
+            self.handles = get_handle(
                 filepath_or_buffer,
                 "r",
                 encoding=self.encoding,
                 compression=self.compression,
             )
-            self.should_close = True
-            self.open_stream = data
-
-        if isinstance(data, BytesIO):
-            data = data.getvalue().decode()
+            filepath_or_buffer = self.handles.handle
 
-        return data
+        return filepath_or_buffer
 
     def _combine_lines(self, lines) -> str:
         """
@@ -759,13 +750,8 @@ def close(self):
 
         If an open stream or file was passed, we leave it open.
         """
-        if self.should_close:
-            try:
-                self.open_stream.close()
-            except (OSError, AttributeError):
-                pass
-        for file_handle in self.file_handles:
-            file_handle.close()
+        if self.handles is not None:
+            self.handles.close()
 
     def __next__(self):
         if self.nrows:
diff --git a/pandas/io/orc.py b/pandas/io/orc.py
index 829ff6408d86d..5a734f0878a0c 100644
--- a/pandas/io/orc.py
+++ b/pandas/io/orc.py
@@ -53,4 +53,5 @@ def read_orc(
     ioargs = get_filepath_or_buffer(path)
     orc_file = pyarrow.orc.ORCFile(ioargs.filepath_or_buffer)
     result = orc_file.read(columns=columns, **kwargs).to_pandas()
+    ioargs.close()
     return result
diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py
index 2110a2d400be8..3b72869188344 100644
--- a/pandas/io/parsers.py
+++ b/pandas/io/parsers.py
@@ -5,7 +5,7 @@
 from collections import abc, defaultdict
 import csv
 import datetime
-from io import StringIO, TextIOWrapper
+from io import StringIO
 import itertools
 import re
 import sys
@@ -63,7 +63,13 @@
 from pandas.core.series import Series
 from pandas.core.tools import datetimes as tools
 
-from pandas.io.common import get_filepath_or_buffer, get_handle, validate_header_arg
+from pandas.io.common import (
+    get_compression_method,
+    get_filepath_or_buffer,
+    get_handle,
+    stringify_path,
+    validate_header_arg,
+)
 from pandas.io.date_converters import generic_parser
 
 # BOM character (byte order mark)
@@ -428,17 +434,16 @@ def _validate_names(names):
 
 def _read(filepath_or_buffer: FilePathOrBuffer, kwds):
     """Generic reader of line files."""
-    encoding = kwds.get("encoding", None)
     storage_options = kwds.get("storage_options", None)
-    if encoding is not None:
-        encoding = re.sub("_", "-", encoding).lower()
-        kwds["encoding"] = encoding
-    compression = kwds.get("compression", "infer")
 
     ioargs = get_filepath_or_buffer(
-        filepath_or_buffer, encoding, compression, storage_options=storage_options
+        filepath_or_buffer,
+        kwds.get("encoding", None),
+        kwds.get("compression", "infer"),
+        storage_options=storage_options,
     )
     kwds["compression"] = ioargs.compression
+    kwds["encoding"] = ioargs.encoding
 
     if kwds.get("date_parser", None) is not None:
         if isinstance(kwds["parse_dates"], bool):
@@ -461,14 +466,10 @@ def _read(filepath_or_buffer: FilePathOrBuffer, kwds):
     try:
         data = parser.read(nrows)
     finally:
+        # close compression and byte/text wrapper
         parser.close()
-
-    if ioargs.should_close:
-        assert not isinstance(ioargs.filepath_or_buffer, str)
-        try:
-            ioargs.filepath_or_buffer.close()
-        except ValueError:
-            pass
+        # close any fsspec-like objects
+        ioargs.close()
 
     return data
 
@@ -1350,10 +1351,6 @@ def __init__(self, kwds):
 
         self._first_chunk = True
 
-        # GH 13932
-        # keep references to file handles opened by the parser itself
-        self.handles = []
-
     def _validate_parse_dates_presence(self, columns: List[str]) -> None:
         """
         Check if parse_dates are in columns.
@@ -1403,8 +1400,7 @@ def _validate_parse_dates_presence(self, columns: List[str]) -> None:
             )
 
     def close(self):
-        for f in self.handles:
-            f.close()
+        self.handles.close()
 
     @property
     def _has_complex_date_col(self):
@@ -1838,23 +1834,29 @@ def __init__(self, src, **kwds):
 
         ParserBase.__init__(self, kwds)
 
-        encoding = kwds.get("encoding")
+        if kwds.get("memory_map", False):
+            # memory-mapped files are directly handled by the TextReader.
+            src = stringify_path(src)
 
-        # parsers.TextReader doesn't support compression dicts
-        if isinstance(kwds.get("compression"), dict):
-            kwds["compression"] = kwds["compression"]["method"]
-
-        if kwds.get("compression") is None and encoding:
-            if isinstance(src, str):
-                src = open(src, "rb")
-                self.handles.append(src)
-
-            # Handle the file object with universal line mode enabled.
-            # We will handle the newline character ourselves later on.
-            if hasattr(src, "read") and not hasattr(src, "encoding"):
-                src = TextIOWrapper(src, encoding=encoding, newline="")
+            if get_compression_method(kwds.get("compression", None))[0] is not None:
+                raise ValueError(
+                    "read_csv does not support compression with memory_map=True. "
+                    + "Please use memory_map=False instead."
+                )
 
-            kwds["encoding"] = "utf-8"
+        self.handles = get_handle(
+            src,
+            mode="r",
+            encoding=kwds.get("encoding", None),
+            compression=kwds.get("compression", None),
+            memory_map=kwds.get("memory_map", False),
+            is_text=True,
+        )
+        kwds.pop("encoding", None)
+        kwds.pop("memory_map", None)
+        kwds.pop("compression", None)
+        if kwds.get("memory_map", False) and hasattr(self.handles.handle, "mmap"):
+            self.handles.handle = self.handles.handle.mmap
 
         # #2442
         kwds["allow_leading_cols"] = self.index_col is not False
@@ -1863,7 +1865,7 @@ def __init__(self, src, **kwds):
         self.usecols, self.usecols_dtype = _validate_usecols_arg(kwds["usecols"])
         kwds["usecols"] = self.usecols
 
-        self._reader = parsers.TextReader(src, **kwds)
+        self._reader = parsers.TextReader(self.handles.handle, **kwds)
         self.unnamed_cols = self._reader.unnamed_cols
 
         passed_names = self.names is None
@@ -1942,11 +1944,10 @@ def __init__(self, src, **kwds):
 
         self._implicit_index = self._reader.leading_cols > 0
 
-    def close(self):
-        for f in self.handles:
-            f.close()
+    def close(self) -> None:
+        super().close()
 
-        # close additional handles opened by C parser (for compression)
+        # close additional handles opened by C parser
         try:
             self._reader.close()
         except ValueError:
@@ -2237,20 +2238,19 @@ def __init__(self, f, **kwds):
         self.comment = kwds["comment"]
         self._comment_lines = []
 
-        f, handles = get_handle(
+        self.handles = get_handle(
             f,
             "r",
             encoding=self.encoding,
             compression=self.compression,
             memory_map=self.memory_map,
         )
-        self.handles.extend(handles)
 
         # Set self.data to something that can read lines.
-        if hasattr(f, "readline"):
-            self._make_reader(f)
+        if hasattr(self.handles.handle, "readline"):
+            self._make_reader(self.handles.handle)
         else:
-            self.data = f
+            self.data = self.handles.handle
 
         # Get columns in two steps: infer from data, then
         # infer column indices from self.usecols if it is specified.
diff --git a/pandas/io/pickle.py b/pandas/io/pickle.py
index 426a40a65b522..6fa044b4651a5 100644
--- a/pandas/io/pickle.py
+++ b/pandas/io/pickle.py
@@ -92,25 +92,18 @@ def to_pickle(
         mode="wb",
         storage_options=storage_options,
     )
-    f, fh = get_handle(
+    handles = get_handle(
         ioargs.filepath_or_buffer, "wb", compression=ioargs.compression, is_text=False
     )
     if protocol < 0:
         protocol = pickle.HIGHEST_PROTOCOL
     try:
-        pickle.dump(obj, f, protocol=protocol)
+        pickle.dump(obj, handles.handle, protocol=protocol)  # type: ignore[arg-type]
     finally:
-        if f != filepath_or_buffer:
-            # do not close user-provided file objects GH 35679
-            f.close()
-        for _f in fh:
-            _f.close()
-        if ioargs.should_close:
-            assert not isinstance(ioargs.filepath_or_buffer, str)
-            try:
-                ioargs.filepath_or_buffer.close()
-            except ValueError:
-                pass
+        # close compression and byte/text wrapper
+        handles.close()
+        # close any fsspec-like objects
+        ioargs.close()
 
 
 def read_pickle(
@@ -193,7 +186,7 @@ def read_pickle(
     ioargs = get_filepath_or_buffer(
         filepath_or_buffer, compression=compression, storage_options=storage_options
     )
-    f, fh = get_handle(
+    handles = get_handle(
         ioargs.filepath_or_buffer, "rb", compression=ioargs.compression, is_text=False
     )
 
@@ -208,24 +201,17 @@ def read_pickle(
             with warnings.catch_warnings(record=True):
                 # We want to silence any warnings about, e.g. moved modules.
                 warnings.simplefilter("ignore", Warning)
-                return pickle.load(f)
+                return pickle.load(handles.handle)  # type: ignore[arg-type]
         except excs_to_catch:
             # e.g.
             #  "No module named 'pandas.core.sparse.series'"
             #  "Can't get attribute '__nat_unpickle' on <module 'pandas._libs.tslib"
-            return pc.load(f, encoding=None)
+            return pc.load(handles.handle, encoding=None)
     except UnicodeDecodeError:
         # e.g. can occur for files written in py27; see GH#28645 and GH#31988
-        return pc.load(f, encoding="latin-1")
+        return pc.load(handles.handle, encoding="latin-1")
     finally:
-        if f != filepath_or_buffer:
-            # do not close user-provided file objects GH 35679
-            f.close()
-        for _f in fh:
-            _f.close()
-        if ioargs.should_close:
-            assert not isinstance(ioargs.filepath_or_buffer, str)
-            try:
-                ioargs.filepath_or_buffer.close()
-            except ValueError:
-                pass
+        # close compression and byte/text wrapper
+        handles.close()
+        # close any fsspec-like objects
+        ioargs.close()
diff --git a/pandas/io/sas/sas7bdat.py b/pandas/io/sas/sas7bdat.py
index 989036917b265..e9b74199cbc42 100644
--- a/pandas/io/sas/sas7bdat.py
+++ b/pandas/io/sas/sas7bdat.py
@@ -16,7 +16,7 @@
 from collections import abc
 from datetime import datetime, timedelta
 import struct
-from typing import IO, Any, Union
+from typing import IO, Any, Union, cast
 
 import numpy as np
 
@@ -131,8 +131,6 @@ class SAS7BDATReader(ReaderBase, abc.Iterator):
         bytes.
     """
 
-    _path_or_buf: IO[Any]
-
     def __init__(
         self,
         path_or_buf,
@@ -170,14 +168,12 @@ def __init__(
         self._current_row_on_page_index = 0
         self._current_row_in_file_index = 0
 
-        path_or_buf = get_filepath_or_buffer(path_or_buf).filepath_or_buffer
-        if isinstance(path_or_buf, str):
-            buf = open(path_or_buf, "rb")
-            self.handle = buf
-        else:
-            buf = path_or_buf
+        self.ioargs = get_filepath_or_buffer(path_or_buf)
+        if isinstance(self.ioargs.filepath_or_buffer, str):
+            self.ioargs.filepath_or_buffer = open(path_or_buf, "rb")
+            self.ioargs.should_close = True
 
-        self._path_or_buf: IO[Any] = buf
+        self._path_or_buf = cast(IO[Any], self.ioargs.filepath_or_buffer)
 
         try:
             self._get_properties()
@@ -202,10 +198,7 @@ def column_types(self):
         return np.asarray(self._column_types, dtype=np.dtype("S1"))
 
     def close(self):
-        try:
-            self.handle.close()
-        except AttributeError:
-            pass
+        self.ioargs.close()
 
     def _get_properties(self):
 
diff --git a/pandas/io/sas/sas_xport.py b/pandas/io/sas/sas_xport.py
index 2a48abe9fbd63..4303cef2df60d 100644
--- a/pandas/io/sas/sas_xport.py
+++ b/pandas/io/sas/sas_xport.py
@@ -10,6 +10,7 @@
 from collections import abc
 from datetime import datetime
 import struct
+from typing import IO, cast
 import warnings
 
 import numpy as np
@@ -252,17 +253,13 @@ def __init__(
         self._index = index
         self._chunksize = chunksize
 
-        if isinstance(filepath_or_buffer, str):
-            filepath_or_buffer = get_filepath_or_buffer(
-                filepath_or_buffer, encoding=encoding
-            ).filepath_or_buffer
+        self.ioargs = get_filepath_or_buffer(filepath_or_buffer, encoding=encoding)
 
-        if isinstance(filepath_or_buffer, (str, bytes)):
-            self.filepath_or_buffer = open(filepath_or_buffer, "rb")
-        else:
-            # Since xport files include non-text byte sequences, xport files
-            # should already be opened in binary mode in Python 3.
-            self.filepath_or_buffer = filepath_or_buffer
+        if isinstance(self.ioargs.filepath_or_buffer, str):
+            self.ioargs.filepath_or_buffer = open(self.ioargs.filepath_or_buffer, "rb")
+            self.ioargs.should_close = True
+
+        self.filepath_or_buffer = cast(IO[bytes], self.ioargs.filepath_or_buffer)
 
         try:
             self._read_header()
@@ -271,7 +268,7 @@ def __init__(
             raise
 
     def close(self):
-        self.filepath_or_buffer.close()
+        self.ioargs.close()
 
     def _get_row(self):
         return self.filepath_or_buffer.read(80).decode()
diff --git a/pandas/io/sas/sasreader.py b/pandas/io/sas/sasreader.py
index caf53b5be971a..446e2daaa1f9c 100644
--- a/pandas/io/sas/sasreader.py
+++ b/pandas/io/sas/sasreader.py
@@ -139,5 +139,4 @@ def read_sas(
     try:
         return reader.read()
     finally:
-        if ioargs.should_close:
-            reader.close()
+        ioargs.close()
diff --git a/pandas/io/stata.py b/pandas/io/stata.py
index cec73ceb17f09..7c7997f128086 100644
--- a/pandas/io/stata.py
+++ b/pandas/io/stata.py
@@ -16,18 +16,7 @@
 from pathlib import Path
 import struct
 import sys
-from typing import (
-    Any,
-    AnyStr,
-    BinaryIO,
-    Dict,
-    List,
-    Optional,
-    Sequence,
-    Tuple,
-    Union,
-    cast,
-)
+from typing import Any, AnyStr, Dict, List, Optional, Sequence, Tuple, Union, cast
 import warnings
 
 from dateutil.relativedelta import relativedelta
@@ -35,7 +24,13 @@
 
 from pandas._libs.lib import infer_dtype
 from pandas._libs.writers import max_len_string_array
-from pandas._typing import CompressionOptions, FilePathOrBuffer, Label, StorageOptions
+from pandas._typing import (
+    Buffer,
+    CompressionOptions,
+    FilePathOrBuffer,
+    Label,
+    StorageOptions,
+)
 from pandas.util._decorators import Appender
 
 from pandas.core.dtypes.common import (
@@ -58,7 +53,12 @@
 from pandas.core.indexes.base import Index
 from pandas.core.series import Series
 
-from pandas.io.common import get_filepath_or_buffer, get_handle, stringify_path
+from pandas.io.common import (
+    IOHandles,
+    get_filepath_or_buffer,
+    get_handle,
+    stringify_path,
+)
 
 _version_error = (
     "Version of given Stata file is {version}. pandas supports importing "
@@ -1062,19 +1062,20 @@ def __init__(
         self._lines_read = 0
 
         self._native_byteorder = _set_endianness(sys.byteorder)
-        path_or_buf = stringify_path(path_or_buf)
-        if isinstance(path_or_buf, str):
-            path_or_buf = get_filepath_or_buffer(
-                path_or_buf, storage_options=storage_options
-            ).filepath_or_buffer
-
-        if isinstance(path_or_buf, (str, bytes)):
-            self.path_or_buf = open(path_or_buf, "rb")
+        self.ioargs = get_filepath_or_buffer(
+            path_or_buf, storage_options=storage_options
+        )
+
+        if isinstance(self.ioargs.filepath_or_buffer, (str, bytes)):
+            self.ioargs.filepath_or_buffer = open(self.ioargs.filepath_or_buffer, "rb")
+            self.ioargs.should_close = True
         elif hasattr(path_or_buf, "read"):
             # Copy to BytesIO, and ensure no encoding
-            pb: Any = path_or_buf
-            contents = pb.read()
-            self.path_or_buf = BytesIO(contents)
+            contents = self.ioargs.filepath_or_buffer.read()
+            self.ioargs.close()
+            self.ioargs.filepath_or_buffer = BytesIO(contents)  # type: ignore[arg-type]
+            self.ioargs.should_close = True
+        self.path_or_buf = cast(BytesIO, self.ioargs.filepath_or_buffer)
 
         self._read_header()
         self._setup_dtype()
@@ -1089,10 +1090,7 @@ def __exit__(self, exc_type, exc_value, traceback) -> None:
 
     def close(self) -> None:
         """ close the handle if its open """
-        try:
-            self.path_or_buf.close()
-        except OSError:
-            pass
+        self.ioargs.close()
 
     def _set_encoding(self) -> None:
         """
@@ -1938,7 +1936,7 @@ def _open_file_binary_write(
     fname: FilePathOrBuffer,
     compression: CompressionOptions,
     storage_options: StorageOptions = None,
-) -> Tuple[BinaryIO, bool, CompressionOptions]:
+) -> Tuple[IOHandles, CompressionOptions]:
     """
     Open a binary file or no-op if file-like.
 
@@ -1958,34 +1956,22 @@ def _open_file_binary_write(
         docs for the set of allowed keys and values
 
         .. versionadded:: 1.2.0
-
-    Returns
-    -------
-    file : file-like object
-        File object supporting write
-    own : bool
-        True if the file was created, otherwise False
     """
-    if hasattr(fname, "write"):
-        # See https://github.com/python/mypy/issues/1424 for hasattr challenges
-        # error: Incompatible return value type (got "Tuple[Union[str, Path,
-        # IO[Any]], bool, None]", expected "Tuple[BinaryIO, bool, Union[str,
-        # Mapping[str, str], None]]")
-        return fname, False, None  # type: ignore[return-value]
-    elif isinstance(fname, (str, Path)):
-        # Extract compression mode as given, if dict
-        ioargs = get_filepath_or_buffer(
-            fname, mode="wb", compression=compression, storage_options=storage_options
-        )
-        f, _ = get_handle(
-            ioargs.filepath_or_buffer,
-            "wb",
-            compression=ioargs.compression,
-            is_text=False,
-        )
-        return f, True, ioargs.compression
-    else:
-        raise TypeError("fname must be a binary file, buffer or path-like.")
+    ioargs = get_filepath_or_buffer(
+        fname, mode="wb", compression=compression, storage_options=storage_options
+    )
+    handles = get_handle(
+        ioargs.filepath_or_buffer,
+        "wb",
+        compression=ioargs.compression,
+        is_text=False,
+    )
+    if ioargs.filepath_or_buffer != fname and not isinstance(
+        ioargs.filepath_or_buffer, str
+    ):
+        # add handle created by get_filepath_or_buffer
+        handles.created_handles.append(ioargs.filepath_or_buffer)
+    return handles, ioargs.compression
 
 
 def _set_endianness(endianness: str) -> str:
@@ -2236,9 +2222,8 @@ def __init__(
         self._time_stamp = time_stamp
         self._data_label = data_label
         self._variable_labels = variable_labels
-        self._own_file = True
         self._compression = compression
-        self._output_file: Optional[BinaryIO] = None
+        self._output_file: Optional[Buffer] = None
         # attach nobs, nvars, data, varlist, typlist
         self._prepare_pandas(data)
         self.storage_options = storage_options
@@ -2249,21 +2234,20 @@ def __init__(
         self._fname = stringify_path(fname)
         self.type_converters = {253: np.int32, 252: np.int16, 251: np.int8}
         self._converted_names: Dict[Label, str] = {}
-        self._file: Optional[BinaryIO] = None
 
     def _write(self, to_write: str) -> None:
         """
         Helper to call encode before writing to file for Python 3 compat.
         """
-        assert self._file is not None
-        self._file.write(to_write.encode(self._encoding))
+        self.handles.handle.write(
+            to_write.encode(self._encoding)  # type: ignore[arg-type]
+        )
 
     def _write_bytes(self, value: bytes) -> None:
         """
         Helper to assert file is open before writing.
         """
-        assert self._file is not None
-        self._file.write(value)
+        self.handles.handle.write(value)  # type: ignore[arg-type]
 
     def _prepare_categoricals(self, data: DataFrame) -> DataFrame:
         """
@@ -2527,12 +2511,14 @@ def _encode_strings(self) -> None:
                     self.data[col] = encoded
 
     def write_file(self) -> None:
-        self._file, self._own_file, compression = _open_file_binary_write(
+        self.handles, compression = _open_file_binary_write(
             self._fname, self._compression, storage_options=self.storage_options
         )
         if compression is not None:
-            self._output_file = self._file
-            self._file = BytesIO()
+            # ZipFile creates a file (with the same name) for each write call.
+            # Write it first into a buffer and then write the buffer to the ZipFile.
+            self._output_file = self.handles.handle
+            self.handles.handle = BytesIO()
         try:
             self._write_header(data_label=self._data_label, time_stamp=self._time_stamp)
             self._write_map()
@@ -2552,10 +2538,9 @@ def write_file(self) -> None:
             self._write_map()
         except Exception as exc:
             self._close()
-            if self._own_file:
+            if isinstance(self._fname, (str, Path)):
                 try:
-                    if isinstance(self._fname, (str, Path)):
-                        os.unlink(self._fname)
+                    os.unlink(self._fname)
                 except OSError:
                     warnings.warn(
                         f"This save was not successful but {self._fname} could not "
@@ -2571,24 +2556,18 @@ def _close(self) -> None:
         Close the file if it was created by the writer.
 
         If a buffer or file-like object was passed in, for example a GzipFile,
-        then leave this file open for the caller to close. In either case,
-        attempt to flush the file contents to ensure they are written to disk
-        (if supported)
+        then leave this file open for the caller to close.
         """
-        # Some file-like objects might not support flush
-        assert self._file is not None
+        # write compression
         if self._output_file is not None:
-            assert isinstance(self._file, BytesIO)
-            bio = self._file
+            assert isinstance(self.handles.handle, BytesIO)
+            bio = self.handles.handle
             bio.seek(0)
-            self._file = self._output_file
-            self._file.write(bio.read())
-        try:
-            self._file.flush()
-        except AttributeError:
-            pass
-        if self._own_file:
-            self._file.close()
+            self.handles.handle = self._output_file
+            self.handles.handle.write(bio.read())  # type: ignore[arg-type]
+            bio.close()
+        # close any created handles
+        self.handles.close()
 
     def _write_map(self) -> None:
         """No-op, future compatibility"""
@@ -3140,8 +3119,8 @@ def _tag(val: Union[str, bytes], tag: str) -> bytes:
 
     def _update_map(self, tag: str) -> None:
         """Update map location for tag with file position"""
-        assert self._file is not None
-        self._map[tag] = self._file.tell()
+        assert self.handles.handle is not None
+        self._map[tag] = self.handles.handle.tell()
 
     def _write_header(
         self,
@@ -3208,12 +3187,11 @@ def _write_map(self) -> None:
         the map with 0s.  The second call writes the final map locations when
         all blocks have been written.
         """
-        assert self._file is not None
         if not self._map:
             self._map = dict(
                 (
                     ("stata_data", 0),
-                    ("map", self._file.tell()),
+                    ("map", self.handles.handle.tell()),
                     ("variable_types", 0),
                     ("varnames", 0),
                     ("sortlist", 0),
@@ -3229,7 +3207,7 @@ def _write_map(self) -> None:
                 )
             )
         # Move to start of map
-        self._file.seek(self._map["map"])
+        self.handles.handle.seek(self._map["map"])
         bio = BytesIO()
         for val in self._map.values():
             bio.write(struct.pack(self._byteorder + "Q", val))
diff --git a/pandas/tests/frame/methods/test_to_csv.py b/pandas/tests/frame/methods/test_to_csv.py
index 5bf1ce508dfc4..3103f6e1ba0b1 100644
--- a/pandas/tests/frame/methods/test_to_csv.py
+++ b/pandas/tests/frame/methods/test_to_csv.py
@@ -1034,11 +1034,12 @@ def test_to_csv_compression(self, df, encoding, compression):
             tm.assert_frame_equal(df, result)
 
             # test the round trip using file handle - to_csv -> read_csv
-            f, _handles = get_handle(
+            handles = get_handle(
                 filename, "w", compression=compression, encoding=encoding
             )
-            with f:
-                df.to_csv(f, encoding=encoding)
+            df.to_csv(handles.handle, encoding=encoding)
+            assert not handles.handle.closed
+            handles.close()
             result = pd.read_csv(
                 filename,
                 compression=compression,
diff --git a/pandas/tests/io/json/test_readlines.py b/pandas/tests/io/json/test_readlines.py
index 933bdc462e3f8..2e68d3306c7d1 100644
--- a/pandas/tests/io/json/test_readlines.py
+++ b/pandas/tests/io/json/test_readlines.py
@@ -143,7 +143,7 @@ def test_readjson_chunks_closes(chunksize):
         )
         reader.read()
         assert (
-            reader.open_stream.closed
+            reader.handles.handle.closed
         ), f"didn't close stream with chunksize = {chunksize}"
 
 
diff --git a/pandas/tests/io/parser/test_common.py b/pandas/tests/io/parser/test_common.py
index b33289213e258..e61a5fce99c69 100644
--- a/pandas/tests/io/parser/test_common.py
+++ b/pandas/tests/io/parser/test_common.py
@@ -6,7 +6,7 @@
 import csv
 from datetime import datetime
 from inspect import signature
-from io import StringIO
+from io import BytesIO, StringIO
 import os
 import platform
 from urllib.error import URLError
@@ -2253,3 +2253,62 @@ def test_dict_keys_as_names(all_parsers):
     result = parser.read_csv(StringIO(data), names=keys)
     expected = DataFrame({"a": [1], "b": [2]})
     tm.assert_frame_equal(result, expected)
+
+
+@pytest.mark.parametrize("io_class", [StringIO, BytesIO])
+@pytest.mark.parametrize("encoding", [None, "utf-8"])
+def test_read_csv_file_handle(all_parsers, io_class, encoding):
+    """
+    Test whether read_csv does not close user-provided file handles.
+
+    GH 36980
+    """
+    parser = all_parsers
+    expected = DataFrame({"a": [1], "b": [2]})
+
+    content = "a,b\n1,2"
+    if io_class == BytesIO:
+        content = content.encode("utf-8")
+    handle = io_class(content)
+
+    tm.assert_frame_equal(parser.read_csv(handle, encoding=encoding), expected)
+    assert not handle.closed
+
+
+def test_memory_map_compression_error(c_parser_only):
+    """
+    c-parsers do not support memory_map=True with compression.
+
+    GH 36997
+    """
+    parser = c_parser_only
+    df = DataFrame({"a": [1], "b": [2]})
+    msg = (
+        "read_csv does not support compression with memory_map=True. "
+        + "Please use memory_map=False instead."
+    )
+
+    with tm.ensure_clean() as path:
+        df.to_csv(path, compression="gzip", index=False)
+
+        with pytest.raises(ValueError, match=msg):
+            parser.read_csv(path, memory_map=True, compression="gzip")
+
+
+def test_memory_map_file_handle(all_parsers):
+    """
+    Support some buffers with memory_map=True.
+
+    GH 36997
+    """
+    parser = all_parsers
+    expected = DataFrame({"a": [1], "b": [2]})
+
+    handle = StringIO()
+    expected.to_csv(handle, index=False)
+    handle.seek(0)
+
+    tm.assert_frame_equal(
+        parser.read_csv(handle, memory_map=True),
+        expected,
+    )
diff --git a/pandas/tests/io/parser/test_encoding.py b/pandas/tests/io/parser/test_encoding.py
index 876696ecdad9c..e74265da3e966 100644
--- a/pandas/tests/io/parser/test_encoding.py
+++ b/pandas/tests/io/parser/test_encoding.py
@@ -152,14 +152,17 @@ def test_binary_mode_file_buffers(
 
     with open(fpath, mode="r", encoding=encoding) as fa:
         result = parser.read_csv(fa)
+        assert not fa.closed
     tm.assert_frame_equal(expected, result)
 
     with open(fpath, mode="rb") as fb:
         result = parser.read_csv(fb, encoding=encoding)
+        assert not fb.closed
     tm.assert_frame_equal(expected, result)
 
     with open(fpath, mode="rb", buffering=0) as fb:
         result = parser.read_csv(fb, encoding=encoding)
+        assert not fb.closed
     tm.assert_frame_equal(expected, result)
 
 
@@ -199,6 +202,7 @@ def test_encoding_named_temp_file(all_parsers):
 
         result = parser.read_csv(f, encoding=encoding)
         tm.assert_frame_equal(result, expected)
+        assert not f.closed
 
 
 @pytest.mark.parametrize(
diff --git a/pandas/tests/io/parser/test_textreader.py b/pandas/tests/io/parser/test_textreader.py
index 1c2518646bb29..413b78a52ad38 100644
--- a/pandas/tests/io/parser/test_textreader.py
+++ b/pandas/tests/io/parser/test_textreader.py
@@ -31,13 +31,10 @@ def test_file_handle(self):
             reader = TextReader(f)
             reader.read()
 
-    def test_string_filename(self):
-        reader = TextReader(self.csv1, header=None)
-        reader.read()
-
     def test_file_handle_mmap(self):
+        # this was never using memory_map=True
         with open(self.csv1, "rb") as f:
-            reader = TextReader(f, memory_map=True, header=None)
+            reader = TextReader(f, header=None)
             reader.read()
 
     def test_StringIO(self):
diff --git a/pandas/tests/io/test_compression.py b/pandas/tests/io/test_compression.py
index 31e9ad4cf4416..8d7d5d85cbb48 100644
--- a/pandas/tests/io/test_compression.py
+++ b/pandas/tests/io/test_compression.py
@@ -47,18 +47,18 @@ def test_compression_size(obj, method, compression_only):
 @pytest.mark.parametrize("method", ["to_csv", "to_json"])
 def test_compression_size_fh(obj, method, compression_only):
     with tm.ensure_clean() as path:
-        f, handles = icom.get_handle(path, "w", compression=compression_only)
-        with f:
-            getattr(obj, method)(f)
-            assert not f.closed
-        assert f.closed
+        handles = icom.get_handle(path, "w", compression=compression_only)
+        getattr(obj, method)(handles.handle)
+        assert not handles.handle.closed
+        handles.close()
+        assert handles.handle.closed
         compressed_size = os.path.getsize(path)
     with tm.ensure_clean() as path:
-        f, handles = icom.get_handle(path, "w", compression=None)
-        with f:
-            getattr(obj, method)(f)
-            assert not f.closed
-        assert f.closed
+        handles = icom.get_handle(path, "w", compression=None)
+        getattr(obj, method)(handles.handle)
+        assert not handles.handle.closed
+        handles.close()
+        assert handles.handle.closed
         uncompressed_size = os.path.getsize(path)
         assert uncompressed_size > compressed_size
 
@@ -111,10 +111,10 @@ def test_compression_warning(compression_only):
         columns=["X", "Y", "Z"],
     )
     with tm.ensure_clean() as path:
-        f, handles = icom.get_handle(path, "w", compression=compression_only)
+        handles = icom.get_handle(path, "w", compression=compression_only)
         with tm.assert_produces_warning(RuntimeWarning, check_stacklevel=False):
-            with f:
-                df.to_csv(f, compression=compression_only)
+            df.to_csv(handles.handle, compression=compression_only)
+        handles.close()
 
 
 def test_compression_binary(compression_only):
diff --git a/pandas/tests/series/methods/test_to_csv.py b/pandas/tests/series/methods/test_to_csv.py
index a72e860340f25..714173158f4d6 100644
--- a/pandas/tests/series/methods/test_to_csv.py
+++ b/pandas/tests/series/methods/test_to_csv.py
@@ -143,11 +143,11 @@ def test_to_csv_compression(self, s, encoding, compression):
             tm.assert_series_equal(s, result)
 
             # test the round trip using file handle - to_csv -> read_csv
-            f, _handles = get_handle(
+            handles = get_handle(
                 filename, "w", compression=compression, encoding=encoding
             )
-            with f:
-                s.to_csv(f, encoding=encoding, header=True)
+            s.to_csv(handles.handle, encoding=encoding, header=True)
+            handles.close()
             result = pd.read_csv(
                 filename,
                 compression=compression,

From ff1cd78535f1badc74061c36700ea005193a8461 Mon Sep 17 00:00:00 2001
From: Marco Gorelli <m.e.gorelli@gmail.com>
Date: Wed, 4 Nov 2020 11:01:25 +0000
Subject: [PATCH 17/21] more typing checks to pre-commit (#37539)

---
 .pre-commit-config.yaml               | 30 +++++++++++++++++++++++++++
 ci/code_checks.sh                     | 23 --------------------
 scripts/validate_unwanted_patterns.py |  2 +-
 3 files changed, 31 insertions(+), 24 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index b0f35087dc922..0c1e4e330c903 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -119,6 +119,36 @@ repos:
         entry: python scripts/validate_unwanted_patterns.py --validation-type="private_function_across_module"
         types: [python]
         exclude: ^(asv_bench|pandas/tests|doc)/
+    -   id: FrameOrSeriesUnion
+        name: Check for use of Union[Series, DataFrame] instead of FrameOrSeriesUnion alias
+        entry: Union\[.*(Series.*DataFrame|DataFrame.*Series).*\]
+        language: pygrep
+        types: [python]
+        exclude: ^pandas/_typing\.py$
+    -   id: type-not-class
+        name: Check for use of foo.__class__ instead of type(foo)
+        entry: \.__class__
+        language: pygrep
+        files: \.(py|pyx)$
+    -   id: unwanted-typing
+        name: Check for use of comment-based annotation syntax and missing error codes
+        entry: |
+            (?x)
+            \#\ type:\ (?!ignore)|
+            \#\ type:\s?ignore(?!\[)
+        language: pygrep
+        types: [python]
+    -   id: no-os-remove
+        name: Check code for instances of os.remove
+        entry: os\.remove
+        language: pygrep
+        types: [python]
+        files: ^pandas/tests/
+        exclude: |
+            (?x)^
+            pandas/tests/io/excel/test_writers\.py|
+            pandas/tests/io/pytables/common\.py|
+            pandas/tests/io/pytables/test_store\.py$
 -   repo: https://github.com/asottile/yesqa
     rev: v1.2.2
     hooks:
diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index 7c48905135f89..b5d63e259456b 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -122,29 +122,6 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
     RET=$(($RET + $?)) ; echo $MSG "DONE"
 
     # -------------------------------------------------------------------------
-    # Type annotations
-
-    MSG='Check for use of comment-based annotation syntax' ; echo $MSG
-    invgrep -R --include="*.py" -P '# type: (?!ignore)' pandas
-    RET=$(($RET + $?)) ; echo $MSG "DONE"
-
-    MSG='Check for missing error codes with # type: ignore' ; echo $MSG
-    invgrep -R --include="*.py" -P '# type:\s?ignore(?!\[)' pandas
-    RET=$(($RET + $?)) ; echo $MSG "DONE"
-
-    MSG='Check for use of Union[Series, DataFrame] instead of FrameOrSeriesUnion alias' ; echo $MSG
-    invgrep -R --include="*.py" --exclude=_typing.py -E 'Union\[.*(Series.*DataFrame|DataFrame.*Series).*\]' pandas
-    RET=$(($RET + $?)) ; echo $MSG "DONE"
-
-    # -------------------------------------------------------------------------
-    MSG='Check for use of foo.__class__ instead of type(foo)' ; echo $MSG
-    invgrep -R --include=*.{py,pyx} '\.__class__' pandas
-    RET=$(($RET + $?)) ; echo $MSG "DONE"
-
-    MSG='Check code for instances of os.remove' ; echo $MSG
-    invgrep -R --include="*.py*" --exclude "common.py" --exclude "test_writers.py" --exclude "test_store.py" -E "os\.remove" pandas/tests/
-    RET=$(($RET + $?)) ; echo $MSG "DONE"
-
     MSG='Check for inconsistent use of pandas namespace in tests' ; echo $MSG
     for class in "Series" "DataFrame" "Index" "MultiIndex" "Timestamp" "Timedelta" "TimedeltaIndex" "DatetimeIndex" "Categorical"; do
         check_namespace ${class}
diff --git a/scripts/validate_unwanted_patterns.py b/scripts/validate_unwanted_patterns.py
index 7b648a589bc61..9c58a55cb907e 100755
--- a/scripts/validate_unwanted_patterns.py
+++ b/scripts/validate_unwanted_patterns.py
@@ -474,7 +474,7 @@ def main(
 
     sys.exit(
         main(
-            function=globals().get(args.validation_type),  # type: ignore
+            function=globals().get(args.validation_type),
             source_path=args.paths,
             output_format=args.format,
         )

From 15f843ab102d7a0cd7f1c7870dfec72d0e28d252 Mon Sep 17 00:00:00 2001
From: Maxim Ivanov <41443370+ivanovmg@users.noreply.github.com>
Date: Wed, 4 Nov 2020 18:04:32 +0700
Subject: [PATCH 18/21] TST: 32bit dtype compat test_groupby_dropna (#37623)

---
 pandas/tests/groupby/test_groupby_dropna.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/groupby/test_groupby_dropna.py b/pandas/tests/groupby/test_groupby_dropna.py
index 02ce4dcf2ae2b..e38fa5e8de87e 100644
--- a/pandas/tests/groupby/test_groupby_dropna.py
+++ b/pandas/tests/groupby/test_groupby_dropna.py
@@ -343,7 +343,7 @@ def test_groupby_nan_included():
     df = pd.DataFrame(data)
     grouped = df.groupby("group", dropna=False)
     result = grouped.indices
-    dtype = "int64"
+    dtype = np.intp
     expected = {
         "g1": np.array([0, 2], dtype=dtype),
         "g2": np.array([3], dtype=dtype),

From cc9c646463d4a93abdc7c61bbb47e7d2ccf2fc4b Mon Sep 17 00:00:00 2001
From: Janus <janus@insignificancegalore.net>
Date: Wed, 4 Nov 2020 14:22:54 +0100
Subject: [PATCH 19/21] BUG: Metadata propagation for groupby iterator (#37461)

---
 doc/source/whatsnew/v1.1.5.rst                |  2 +-
 pandas/core/groupby/ops.py                    | 15 ++++++++++++---
 pandas/tests/groupby/test_groupby_subclass.py |  9 +++++++++
 3 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/doc/source/whatsnew/v1.1.5.rst b/doc/source/whatsnew/v1.1.5.rst
index cf728d94b2a55..a122154904996 100644
--- a/doc/source/whatsnew/v1.1.5.rst
+++ b/doc/source/whatsnew/v1.1.5.rst
@@ -23,7 +23,7 @@ Fixed regressions
 
 Bug fixes
 ~~~~~~~~~
--
+- Bug in metadata propagation for ``groupby`` iterator (:issue:`37343`)
 -
 
 .. ---------------------------------------------------------------------------
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index ccf23a6f24c42..f807b740abaf2 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -140,9 +140,16 @@ def get_iterator(
         splitter = self._get_splitter(data, axis=axis)
         keys = self._get_group_keys()
         for key, (i, group) in zip(keys, splitter):
-            yield key, group
+            yield key, group.__finalize__(data, method="groupby")
 
     def _get_splitter(self, data: FrameOrSeries, axis: int = 0) -> "DataSplitter":
+        """
+        Returns
+        -------
+        Generator yielding subsetted objects
+
+        __finalize__ has not been called for the the subsetted objects returned.
+        """
         comp_ids, _, ngroups = self.group_info
         return get_splitter(data, comp_ids, ngroups, axis=axis)
 
@@ -918,7 +925,8 @@ class SeriesSplitter(DataSplitter):
     def _chop(self, sdata: Series, slice_obj: slice) -> Series:
         # fastpath equivalent to `sdata.iloc[slice_obj]`
         mgr = sdata._mgr.get_slice(slice_obj)
-        return type(sdata)(mgr, name=sdata.name, fastpath=True)
+        # __finalize__ not called here, must be applied by caller if applicable
+        return sdata._constructor(mgr, name=sdata.name, fastpath=True)
 
 
 class FrameSplitter(DataSplitter):
@@ -934,7 +942,8 @@ def _chop(self, sdata: DataFrame, slice_obj: slice) -> DataFrame:
         # else:
         #     return sdata.iloc[:, slice_obj]
         mgr = sdata._mgr.get_slice(slice_obj, axis=1 - self.axis)
-        return type(sdata)(mgr)
+        # __finalize__ not called here, must be applied by caller if applicable
+        return sdata._constructor(mgr)
 
 
 def get_splitter(
diff --git a/pandas/tests/groupby/test_groupby_subclass.py b/pandas/tests/groupby/test_groupby_subclass.py
index cc7a79e976513..d268d87708552 100644
--- a/pandas/tests/groupby/test_groupby_subclass.py
+++ b/pandas/tests/groupby/test_groupby_subclass.py
@@ -51,6 +51,15 @@ def test_groupby_preserves_subclass(obj, groupby_func):
         tm.assert_series_equal(result1, result2)
 
 
+def test_groupby_preserves_metadata():
+    # GH-37343
+    custom_df = tm.SubclassedDataFrame({"a": [1, 2, 3], "b": [1, 1, 2], "c": [7, 8, 9]})
+    assert "testattr" in custom_df._metadata
+    custom_df.testattr = "hello"
+    for _, group_df in custom_df.groupby("c"):
+        assert group_df.testattr == "hello"
+
+
 @pytest.mark.parametrize("obj", [DataFrame, tm.SubclassedDataFrame])
 def test_groupby_resample_preserves_subclass(obj):
     # GH28330 -- preserve subclass through groupby.resample()

From 1c6cd01a4f3ba0e8f4dc2fccc64c216f577b5eca Mon Sep 17 00:00:00 2001
From: jbrockmendel <jbrockmendel@gmail.com>
Date: Wed, 4 Nov 2020 05:43:46 -0800
Subject: [PATCH 20/21] BUG: read-only values in cython funcs (#37613)

---
 doc/source/whatsnew/v1.2.0.rst          |  2 ++
 pandas/_libs/join.pyx                   |  2 +-
 pandas/_libs/tslibs/strptime.pyx        |  4 ++--
 pandas/_libs/tslibs/timedeltas.pyx      |  2 +-
 pandas/core/arrays/datetimelike.py      |  3 +--
 pandas/tests/libs/test_join.py          |  7 ++++++-
 pandas/tests/tools/test_to_datetime.py  | 10 ++++++++++
 pandas/tests/tools/test_to_timedelta.py | 10 ++++++++++
 8 files changed, 33 insertions(+), 7 deletions(-)

diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
index 33e9bd0c2732a..2e976371c0ac8 100644
--- a/doc/source/whatsnew/v1.2.0.rst
+++ b/doc/source/whatsnew/v1.2.0.rst
@@ -399,11 +399,13 @@ Datetimelike
 - Bug in :meth:`TimedeltaIndex.sum` and :meth:`Series.sum` with ``timedelta64`` dtype on an empty index or series returning ``NaT`` instead of ``Timedelta(0)`` (:issue:`31751`)
 - Bug in :meth:`DatetimeArray.shift` incorrectly allowing ``fill_value`` with a mismatched timezone (:issue:`37299`)
 - Bug in adding a :class:`BusinessDay` with nonzero ``offset`` to a non-scalar other (:issue:`37457`)
+- Bug in :func:`to_datetime` with a read-only array incorrectly raising (:issue:`34857`)
 
 Timedelta
 ^^^^^^^^^
 - Bug in :class:`TimedeltaIndex`, :class:`Series`, and :class:`DataFrame` floor-division with ``timedelta64`` dtypes and ``NaT`` in the denominator (:issue:`35529`)
 - Bug in parsing of ISO 8601 durations in :class:`Timedelta`, :meth:`pd.to_datetime` (:issue:`37159`, fixes :issue:`29773` and :issue:`36204`)
+- Bug in :func:`to_timedelta` with a read-only array incorrectly raising (:issue:`34857`)
 
 Timezones
 ^^^^^^^^^
diff --git a/pandas/_libs/join.pyx b/pandas/_libs/join.pyx
index 13c7187923473..1b79d68c13570 100644
--- a/pandas/_libs/join.pyx
+++ b/pandas/_libs/join.pyx
@@ -268,7 +268,7 @@ ctypedef fused join_t:
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def left_join_indexer_unique(join_t[:] left, join_t[:] right):
+def left_join_indexer_unique(ndarray[join_t] left, ndarray[join_t] right):
     cdef:
         Py_ssize_t i, j, nleft, nright
         ndarray[int64_t] indexer
diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx
index d2690be905a68..bc4632ad028ab 100644
--- a/pandas/_libs/tslibs/strptime.pyx
+++ b/pandas/_libs/tslibs/strptime.pyx
@@ -12,7 +12,7 @@ from _thread import allocate_lock as _thread_allocate_lock
 import numpy as np
 import pytz
 
-from numpy cimport int64_t
+from numpy cimport int64_t, ndarray
 
 from pandas._libs.tslibs.nattype cimport (
     NPY_NAT,
@@ -51,7 +51,7 @@ cdef dict _parse_code_table = {'y': 0,
                                'u': 22}
 
 
-def array_strptime(object[:] values, object fmt, bint exact=True, errors='raise'):
+def array_strptime(ndarray[object] values, object fmt, bint exact=True, errors='raise'):
     """
     Calculates the datetime structs represented by the passed array of strings
 
diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx
index 45f32d92c7a74..29e8c58055f9e 100644
--- a/pandas/_libs/tslibs/timedeltas.pyx
+++ b/pandas/_libs/tslibs/timedeltas.pyx
@@ -227,7 +227,7 @@ cdef convert_to_timedelta64(object ts, str unit):
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def array_to_timedelta64(object[:] values, str unit=None, str errors="raise"):
+def array_to_timedelta64(ndarray[object] values, str unit=None, str errors="raise"):
     """
     Convert an ndarray to an array of timedeltas. If errors == 'coerce',
     coerce non-convertible objects to NaT. Otherwise, raise.
diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
index 1955a96160a4a..e845dbf39dbc9 100644
--- a/pandas/core/arrays/datetimelike.py
+++ b/pandas/core/arrays/datetimelike.py
@@ -1025,9 +1025,8 @@ def _addsub_object_array(self, other: np.ndarray, op):
         result : same class as self
         """
         assert op in [operator.add, operator.sub]
-        if len(other) == 1:
+        if len(other) == 1 and self.ndim == 1:
             # If both 1D then broadcasting is unambiguous
-            # TODO(EA2D): require self.ndim == other.ndim here
             return op(self, other[0])
 
         warnings.warn(
diff --git a/pandas/tests/libs/test_join.py b/pandas/tests/libs/test_join.py
index 95d6dcbaf3baf..f3f09d7a42204 100644
--- a/pandas/tests/libs/test_join.py
+++ b/pandas/tests/libs/test_join.py
@@ -135,9 +135,14 @@ def test_cython_inner_join(self):
         tm.assert_numpy_array_equal(rs, exp_rs, check_dtype=False)
 
 
-def test_left_join_indexer_unique():
+@pytest.mark.parametrize("readonly", [True, False])
+def test_left_join_indexer_unique(readonly):
     a = np.array([1, 2, 3, 4, 5], dtype=np.int64)
     b = np.array([2, 2, 3, 4, 4], dtype=np.int64)
+    if readonly:
+        # GH#37312, GH#37264
+        a.setflags(write=False)
+        b.setflags(write=False)
 
     result = libjoin.left_join_indexer_unique(b, a)
     expected = np.array([1, 1, 2, 3, 3], dtype=np.int64)
diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py
index ebe118252c8cf..10bda16655586 100644
--- a/pandas/tests/tools/test_to_datetime.py
+++ b/pandas/tests/tools/test_to_datetime.py
@@ -36,6 +36,16 @@
 
 
 class TestTimeConversionFormats:
+    @pytest.mark.parametrize("readonly", [True, False])
+    def test_to_datetime_readonly(self, readonly):
+        # GH#34857
+        arr = np.array([], dtype=object)
+        if readonly:
+            arr.setflags(write=False)
+        result = to_datetime(arr)
+        expected = to_datetime([])
+        tm.assert_index_equal(result, expected)
+
     @pytest.mark.parametrize("cache", [True, False])
     def test_to_datetime_format(self, cache):
         values = ["1/1/2000", "1/2/2000", "1/3/2000"]
diff --git a/pandas/tests/tools/test_to_timedelta.py b/pandas/tests/tools/test_to_timedelta.py
index 8e48295c533cc..5be7e81df53f2 100644
--- a/pandas/tests/tools/test_to_timedelta.py
+++ b/pandas/tests/tools/test_to_timedelta.py
@@ -9,6 +9,16 @@
 
 
 class TestTimedeltas:
+    @pytest.mark.parametrize("readonly", [True, False])
+    def test_to_timedelta_readonly(self, readonly):
+        # GH#34857
+        arr = np.array([], dtype=object)
+        if readonly:
+            arr.setflags(write=False)
+        result = to_timedelta(arr)
+        expected = to_timedelta([])
+        tm.assert_index_equal(result, expected)
+
     def test_to_timedelta(self):
 
         result = to_timedelta(["", ""])

From a0571352b1ecf3b93dd0badbd02f873bebf906e0 Mon Sep 17 00:00:00 2001
From: Marco Gorelli <m.e.gorelli@gmail.com>
Date: Wed, 4 Nov 2020 13:47:36 +0000
Subject: [PATCH 21/21] CLN refactor core/arrays (#37581)

---
 pandas/core/arrays/base.py         |  9 ++++-----
 pandas/core/arrays/boolean.py      | 20 +++++++++----------
 pandas/core/arrays/categorical.py  |  8 +++-----
 pandas/core/arrays/datetimelike.py |  6 ++----
 pandas/core/arrays/masked.py       |  7 ++++---
 pandas/core/arrays/numpy_.py       |  6 ++----
 pandas/core/arrays/period.py       |  6 ++----
 pandas/core/arrays/sparse/array.py | 32 ++++++++++--------------------
 pandas/core/arrays/timedeltas.py   |  6 ++----
 9 files changed, 40 insertions(+), 60 deletions(-)

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index 57f8f11d4d04c..82d79cc47a4ae 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -460,7 +460,7 @@ def astype(self, dtype, copy=True):
         if is_dtype_equal(dtype, self.dtype):
             if not copy:
                 return self
-            elif copy:
+            else:
                 return self.copy()
         if isinstance(dtype, StringDtype):  # allow conversion to StringArrays
             return dtype.construct_array_type()._from_sequence(self, copy=False)
@@ -544,14 +544,13 @@ def argsort(
         ascending = nv.validate_argsort_with_ascending(ascending, args, kwargs)
 
         values = self._values_for_argsort()
-        result = nargsort(
+        return nargsort(
             values,
             kind=kind,
             ascending=ascending,
             na_position=na_position,
             mask=np.asarray(self.isna()),
         )
-        return result
 
     def argmin(self):
         """
@@ -780,12 +779,12 @@ def equals(self, other: object) -> bool:
         boolean
             Whether the arrays are equivalent.
         """
-        if not type(self) == type(other):
+        if type(self) != type(other):
             return False
         other = cast(ExtensionArray, other)
         if not is_dtype_equal(self.dtype, other.dtype):
             return False
-        elif not len(self) == len(other):
+        elif len(self) != len(other):
             return False
         else:
             equal_values = self == other
diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py
index 73aa97c832848..21306455573b8 100644
--- a/pandas/core/arrays/boolean.py
+++ b/pandas/core/arrays/boolean.py
@@ -170,12 +170,13 @@ def coerce_to_array(
         values[~mask_values] = values_object[~mask_values].astype(bool)
 
         # if the values were integer-like, validate it were actually 0/1's
-        if inferred_dtype in integer_like:
-            if not np.all(
+        if (inferred_dtype in integer_like) and not (
+            np.all(
                 values[~mask_values].astype(float)
                 == values_object[~mask_values].astype(float)
-            ):
-                raise TypeError("Need to pass bool-like values")
+            )
+        ):
+            raise TypeError("Need to pass bool-like values")
 
     if mask is None and mask_values is None:
         mask = np.zeros(len(values), dtype=bool)
@@ -193,9 +194,9 @@ def coerce_to_array(
             if mask_values is not None:
                 mask = mask | mask_values
 
-    if not values.ndim == 1:
+    if values.ndim != 1:
         raise ValueError("values must be a 1D list-like")
-    if not mask.ndim == 1:
+    if mask.ndim != 1:
         raise ValueError("mask must be a 1D list-like")
 
     return values, mask
@@ -395,9 +396,8 @@ def astype(self, dtype, copy: bool = True) -> ArrayLike:
                 self._data.astype(dtype.numpy_dtype), self._mask.copy(), copy=False
             )
         # for integer, error if there are missing values
-        if is_integer_dtype(dtype):
-            if self._hasna:
-                raise ValueError("cannot convert NA to integer")
+        if is_integer_dtype(dtype) and self._hasna:
+            raise ValueError("cannot convert NA to integer")
         # for float dtype, ensure we use np.nan before casting (numpy cannot
         # deal with pd.NA)
         na_value = self._na_value
@@ -576,7 +576,7 @@ def _logical_method(self, other, op):
         elif isinstance(other, np.bool_):
             other = other.item()
 
-        if other_is_scalar and not (other is libmissing.NA or lib.is_bool(other)):
+        if other_is_scalar and other is not libmissing.NA and not lib.is_bool(other):
             raise TypeError(
                 "'other' should be pandas.NA or a bool. "
                 f"Got {type(other).__name__} instead."
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 9f0414cf7a806..626fb495dec03 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -1314,8 +1314,7 @@ def isna(self):
         Categorical.notna : Boolean inverse of Categorical.isna.
 
         """
-        ret = self._codes == -1
-        return ret
+        return self._codes == -1
 
     isnull = isna
 
@@ -1363,7 +1362,7 @@ def value_counts(self, dropna=True):
         from pandas import CategoricalIndex, Series
 
         code, cat = self._codes, self.categories
-        ncat, mask = len(cat), 0 <= code
+        ncat, mask = (len(cat), code >= 0)
         ix, clean = np.arange(ncat), mask.all()
 
         if dropna or clean:
@@ -1920,8 +1919,7 @@ def _reverse_indexer(self) -> Dict[Hashable, np.ndarray]:
         )
         counts = counts.cumsum()
         _result = (r[start:end] for start, end in zip(counts, counts[1:]))
-        result = dict(zip(categories, _result))
-        return result
+        return dict(zip(categories, _result))
 
     # ------------------------------------------------------------------
     # Reductions
diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
index e845dbf39dbc9..404511895ddf0 100644
--- a/pandas/core/arrays/datetimelike.py
+++ b/pandas/core/arrays/datetimelike.py
@@ -1062,8 +1062,7 @@ def _time_shift(self, periods, freq=None):
             if isinstance(freq, str):
                 freq = to_offset(freq)
             offset = periods * freq
-            result = self + offset
-            return result
+            return self + offset
 
         if periods == 0 or len(self) == 0:
             # GH#14811 empty case
@@ -1533,10 +1532,9 @@ def _round(self, freq, mode, ambiguous, nonexistent):
             self = cast("DatetimeArray", self)
             naive = self.tz_localize(None)
             result = naive._round(freq, mode, ambiguous, nonexistent)
-            aware = result.tz_localize(
+            return result.tz_localize(
                 self.tz, ambiguous=ambiguous, nonexistent=nonexistent
             )
-            return aware
 
         values = self.view("i8")
         result = round_nsint64(values, mode, freq)
diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
index 9febba0f544ac..b633f268049e5 100644
--- a/pandas/core/arrays/masked.py
+++ b/pandas/core/arrays/masked.py
@@ -84,9 +84,9 @@ def __init__(self, values: np.ndarray, mask: np.ndarray, copy: bool = False):
                 "mask should be boolean numpy array. Use "
                 "the 'pd.array' function instead"
             )
-        if not values.ndim == 1:
+        if values.ndim != 1:
             raise ValueError("values must be a 1D array")
-        if not mask.ndim == 1:
+        if mask.ndim != 1:
             raise ValueError("mask must be a 1D array")
 
         if copy:
@@ -209,7 +209,8 @@ def to_numpy(
             dtype = object
         if self._hasna:
             if (
-                not (is_object_dtype(dtype) or is_string_dtype(dtype))
+                not is_object_dtype(dtype)
+                and not is_string_dtype(dtype)
                 and na_value is libmissing.NA
             ):
                 raise ValueError(
diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py
index cd48f6cbc8170..e1a424b719a4a 100644
--- a/pandas/core/arrays/numpy_.py
+++ b/pandas/core/arrays/numpy_.py
@@ -281,17 +281,15 @@ def all(self, *, axis=None, out=None, keepdims=False, skipna=True):
 
     def min(self, *, skipna: bool = True, **kwargs) -> Scalar:
         nv.validate_min((), kwargs)
-        result = masked_reductions.min(
+        return masked_reductions.min(
             values=self.to_numpy(), mask=self.isna(), skipna=skipna
         )
-        return result
 
     def max(self, *, skipna: bool = True, **kwargs) -> Scalar:
         nv.validate_max((), kwargs)
-        result = masked_reductions.max(
+        return masked_reductions.max(
             values=self.to_numpy(), mask=self.isna(), skipna=skipna
         )
-        return result
 
     def sum(self, *, axis=None, skipna=True, min_count=0, **kwargs) -> Scalar:
         nv.validate_sum((), kwargs)
diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py
index d808ade53ad33..8de84a0187e95 100644
--- a/pandas/core/arrays/period.py
+++ b/pandas/core/arrays/period.py
@@ -589,7 +589,7 @@ def astype(self, dtype, copy: bool = True):
         if is_dtype_equal(dtype, self._dtype):
             if not copy:
                 return self
-            elif copy:
+            else:
                 return self.copy()
         if is_period_dtype(dtype):
             return self.asfreq(dtype.freq)
@@ -1080,11 +1080,9 @@ def _make_field_arrays(*fields):
             elif length is None:
                 length = len(x)
 
-    arrays = [
+    return [
         np.asarray(x)
         if isinstance(x, (np.ndarray, list, ABCSeries))
         else np.repeat(x, length)
         for x in fields
     ]
-
-    return arrays
diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py
index 4346e02069667..5f4cd4b269a2a 100644
--- a/pandas/core/arrays/sparse/array.py
+++ b/pandas/core/arrays/sparse/array.py
@@ -316,9 +316,8 @@ def __init__(
             raise Exception("must only pass scalars with an index")
 
         if is_scalar(data):
-            if index is not None:
-                if data is None:
-                    data = np.nan
+            if index is not None and data is None:
+                data = np.nan
 
             if index is not None:
                 npoints = len(index)
@@ -575,8 +574,7 @@ def density(self):
         >>> s.density
         0.6
         """
-        r = float(self.sp_index.npoints) / float(self.sp_index.length)
-        return r
+        return float(self.sp_index.npoints) / float(self.sp_index.length)
 
     @property
     def npoints(self) -> int:
@@ -736,25 +734,17 @@ def value_counts(self, dropna=True):
 
         keys, counts = algos.value_counts_arraylike(self.sp_values, dropna=dropna)
         fcounts = self.sp_index.ngaps
-        if fcounts > 0:
-            if self._null_fill_value and dropna:
-                pass
+        if fcounts > 0 and (not self._null_fill_value or not dropna):
+            mask = isna(keys) if self._null_fill_value else keys == self.fill_value
+            if mask.any():
+                counts[mask] += fcounts
             else:
-                if self._null_fill_value:
-                    mask = isna(keys)
-                else:
-                    mask = keys == self.fill_value
-
-                if mask.any():
-                    counts[mask] += fcounts
-                else:
-                    keys = np.insert(keys, 0, self.fill_value)
-                    counts = np.insert(counts, 0, fcounts)
+                keys = np.insert(keys, 0, self.fill_value)
+                counts = np.insert(counts, 0, fcounts)
 
         if not isinstance(keys, ABCIndexClass):
             keys = Index(keys)
-        result = Series(counts, index=keys)
-        return result
+        return Series(counts, index=keys)
 
     # --------
     # Indexing
@@ -1062,7 +1052,7 @@ def astype(self, dtype=None, copy=True):
         if is_dtype_equal(dtype, self._dtype):
             if not copy:
                 return self
-            elif copy:
+            else:
                 return self.copy()
         dtype = self.dtype.update_dtype(dtype)
         subtype = dtype._subtype_with_str
diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py
index e4a844fd4c6ef..8a87df18b6adb 100644
--- a/pandas/core/arrays/timedeltas.py
+++ b/pandas/core/arrays/timedeltas.py
@@ -227,8 +227,7 @@ def _from_sequence(
         data, inferred_freq = sequence_to_td64ns(data, copy=copy, unit=None)
         freq, _ = dtl.validate_inferred_freq(None, inferred_freq, False)
 
-        result = cls._simple_new(data, freq=freq)
-        return result
+        return cls._simple_new(data, freq=freq)
 
     @classmethod
     def _from_sequence_not_strict(
@@ -334,10 +333,9 @@ def astype(self, dtype, copy: bool = True):
             if self._hasnans:
                 # avoid double-copying
                 result = self._data.astype(dtype, copy=False)
-                values = self._maybe_mask_results(
+                return self._maybe_mask_results(
                     result, fill_value=None, convert="float64"
                 )
-                return values
             result = self._data.astype(dtype, copy=copy)
             return result.astype("i8")
         elif is_timedelta64_ns_dtype(dtype):