From 03abbdc71e4137f0aa8f983fdc6c920b8a6787de Mon Sep 17 00:00:00 2001
From: Marco Gorelli <m.e.gorelli@gmail.com>
Date: Mon, 2 Nov 2020 11:35:38 +0000
Subject: [PATCH 1/2] refactor core-arrays

---
 pandas/core/arrays/base.py         | 12 +++++------
 pandas/core/arrays/boolean.py      | 22 ++++++++++----------
 pandas/core/arrays/categorical.py  |  8 +++-----
 pandas/core/arrays/datetimelike.py |  6 ++----
 pandas/core/arrays/masked.py       |  7 ++++---
 pandas/core/arrays/numpy_.py       |  6 ++----
 pandas/core/arrays/period.py       |  6 ++----
 pandas/core/arrays/sparse/array.py | 32 ++++++++++--------------------
 pandas/core/arrays/timedeltas.py   |  6 ++----
 9 files changed, 41 insertions(+), 64 deletions(-)

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index 57f8f11d4d04c..3216957e1f188 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -335,8 +335,7 @@ def __iter__(self):
         # This needs to be implemented so that pandas recognizes extension
         # arrays as list-like. The default implementation makes successive
         # calls to ``__getitem__``, which may be slower than necessary.
-        for i in range(len(self)):
-            yield self[i]
+        yield from self
 
     def __eq__(self, other: Any) -> ArrayLike:
         """
@@ -460,7 +459,7 @@ def astype(self, dtype, copy=True):
         if is_dtype_equal(dtype, self.dtype):
             if not copy:
                 return self
-            elif copy:
+            else:
                 return self.copy()
         if isinstance(dtype, StringDtype):  # allow conversion to StringArrays
             return dtype.construct_array_type()._from_sequence(self, copy=False)
@@ -544,14 +543,13 @@ def argsort(
         ascending = nv.validate_argsort_with_ascending(ascending, args, kwargs)
 
         values = self._values_for_argsort()
-        result = nargsort(
+        return nargsort(
             values,
             kind=kind,
             ascending=ascending,
             na_position=na_position,
             mask=np.asarray(self.isna()),
         )
-        return result
 
     def argmin(self):
         """
@@ -780,12 +778,12 @@ def equals(self, other: object) -> bool:
         boolean
             Whether the arrays are equivalent.
         """
-        if not type(self) == type(other):
+        if type(self) != type(other):
             return False
         other = cast(ExtensionArray, other)
         if not is_dtype_equal(self.dtype, other.dtype):
             return False
-        elif not len(self) == len(other):
+        elif len(self) != len(other):
             return False
         else:
             equal_values = self == other
diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py
index 73aa97c832848..e992e7478017e 100644
--- a/pandas/core/arrays/boolean.py
+++ b/pandas/core/arrays/boolean.py
@@ -170,12 +170,11 @@ def coerce_to_array(
         values[~mask_values] = values_object[~mask_values].astype(bool)
 
         # if the values were integer-like, validate it were actually 0/1's
-        if inferred_dtype in integer_like:
-            if not np.all(
-                values[~mask_values].astype(float)
-                == values_object[~mask_values].astype(float)
-            ):
-                raise TypeError("Need to pass bool-like values")
+        if inferred_dtype in integer_like and not np.all(
+            values[~mask_values].astype(float)
+            == values_object[~mask_values].astype(float)
+        ):
+            raise TypeError("Need to pass bool-like values")
 
     if mask is None and mask_values is None:
         mask = np.zeros(len(values), dtype=bool)
@@ -193,9 +192,9 @@ def coerce_to_array(
             if mask_values is not None:
                 mask = mask | mask_values
 
-    if not values.ndim == 1:
+    if values.ndim != 1:
         raise ValueError("values must be a 1D list-like")
-    if not mask.ndim == 1:
+    if mask.ndim != 1:
         raise ValueError("mask must be a 1D list-like")
 
     return values, mask
@@ -395,9 +394,8 @@ def astype(self, dtype, copy: bool = True) -> ArrayLike:
                 self._data.astype(dtype.numpy_dtype), self._mask.copy(), copy=False
             )
         # for integer, error if there are missing values
-        if is_integer_dtype(dtype):
-            if self._hasna:
-                raise ValueError("cannot convert NA to integer")
+        if is_integer_dtype(dtype) and self._hasna:
+            raise ValueError("cannot convert NA to integer")
         # for float dtype, ensure we use np.nan before casting (numpy cannot
         # deal with pd.NA)
         na_value = self._na_value
@@ -576,7 +574,7 @@ def _logical_method(self, other, op):
         elif isinstance(other, np.bool_):
             other = other.item()
 
-        if other_is_scalar and not (other is libmissing.NA or lib.is_bool(other)):
+        if other_is_scalar and other is not libmissing.NA and not lib.is_bool(other):
             raise TypeError(
                 "'other' should be pandas.NA or a bool. "
                 f"Got {type(other).__name__} instead."
diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
index 03f66ff82ad75..f77cea73ef6c1 100644
--- a/pandas/core/arrays/categorical.py
+++ b/pandas/core/arrays/categorical.py
@@ -1319,8 +1319,7 @@ def isna(self):
         Categorical.notna : Boolean inverse of Categorical.isna.
 
         """
-        ret = self._codes == -1
-        return ret
+        return self._codes == -1
 
     isnull = isna
 
@@ -1368,7 +1367,7 @@ def value_counts(self, dropna=True):
         from pandas import CategoricalIndex, Series
 
         code, cat = self._codes, self.categories
-        ncat, mask = len(cat), 0 <= code
+        ncat, mask = (len(cat), code >= 0)
         ix, clean = np.arange(ncat), mask.all()
 
         if dropna or clean:
@@ -1930,8 +1929,7 @@ def _reverse_indexer(self) -> Dict[Hashable, np.ndarray]:
         )
         counts = counts.cumsum()
         _result = (r[start:end] for start, end in zip(counts, counts[1:]))
-        result = dict(zip(categories, _result))
-        return result
+        return dict(zip(categories, _result))
 
     # ------------------------------------------------------------------
     # Reductions
diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py
index f8a609fb0cabe..d82399e00a2aa 100644
--- a/pandas/core/arrays/datetimelike.py
+++ b/pandas/core/arrays/datetimelike.py
@@ -1064,8 +1064,7 @@ def _time_shift(self, periods, freq=None):
             if isinstance(freq, str):
                 freq = to_offset(freq)
             offset = periods * freq
-            result = self + offset
-            return result
+            return self + offset
 
         if periods == 0 or len(self) == 0:
             # GH#14811 empty case
@@ -1535,10 +1534,9 @@ def _round(self, freq, mode, ambiguous, nonexistent):
             self = cast("DatetimeArray", self)
             naive = self.tz_localize(None)
             result = naive._round(freq, mode, ambiguous, nonexistent)
-            aware = result.tz_localize(
+            return result.tz_localize(
                 self.tz, ambiguous=ambiguous, nonexistent=nonexistent
             )
-            return aware
 
         values = self.view("i8")
         result = round_nsint64(values, mode, freq)
diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py
index 9febba0f544ac..b633f268049e5 100644
--- a/pandas/core/arrays/masked.py
+++ b/pandas/core/arrays/masked.py
@@ -84,9 +84,9 @@ def __init__(self, values: np.ndarray, mask: np.ndarray, copy: bool = False):
                 "mask should be boolean numpy array. Use "
                 "the 'pd.array' function instead"
             )
-        if not values.ndim == 1:
+        if values.ndim != 1:
             raise ValueError("values must be a 1D array")
-        if not mask.ndim == 1:
+        if mask.ndim != 1:
             raise ValueError("mask must be a 1D array")
 
         if copy:
@@ -209,7 +209,8 @@ def to_numpy(
             dtype = object
         if self._hasna:
             if (
-                not (is_object_dtype(dtype) or is_string_dtype(dtype))
+                not is_object_dtype(dtype)
+                and not is_string_dtype(dtype)
                 and na_value is libmissing.NA
             ):
                 raise ValueError(
diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py
index cd48f6cbc8170..e1a424b719a4a 100644
--- a/pandas/core/arrays/numpy_.py
+++ b/pandas/core/arrays/numpy_.py
@@ -281,17 +281,15 @@ def all(self, *, axis=None, out=None, keepdims=False, skipna=True):
 
     def min(self, *, skipna: bool = True, **kwargs) -> Scalar:
         nv.validate_min((), kwargs)
-        result = masked_reductions.min(
+        return masked_reductions.min(
             values=self.to_numpy(), mask=self.isna(), skipna=skipna
         )
-        return result
 
     def max(self, *, skipna: bool = True, **kwargs) -> Scalar:
         nv.validate_max((), kwargs)
-        result = masked_reductions.max(
+        return masked_reductions.max(
             values=self.to_numpy(), mask=self.isna(), skipna=skipna
         )
-        return result
 
     def sum(self, *, axis=None, skipna=True, min_count=0, **kwargs) -> Scalar:
         nv.validate_sum((), kwargs)
diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py
index b95a7acc19b1f..e2fbf26840c22 100644
--- a/pandas/core/arrays/period.py
+++ b/pandas/core/arrays/period.py
@@ -593,7 +593,7 @@ def astype(self, dtype, copy: bool = True):
         if is_dtype_equal(dtype, self._dtype):
             if not copy:
                 return self
-            elif copy:
+            else:
                 return self.copy()
         if is_period_dtype(dtype):
             return self.asfreq(dtype.freq)
@@ -1084,11 +1084,9 @@ def _make_field_arrays(*fields):
             elif length is None:
                 length = len(x)
 
-    arrays = [
+    return [
         np.asarray(x)
         if isinstance(x, (np.ndarray, list, ABCSeries))
         else np.repeat(x, length)
         for x in fields
     ]
-
-    return arrays
diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py
index 4346e02069667..5f4cd4b269a2a 100644
--- a/pandas/core/arrays/sparse/array.py
+++ b/pandas/core/arrays/sparse/array.py
@@ -316,9 +316,8 @@ def __init__(
             raise Exception("must only pass scalars with an index")
 
         if is_scalar(data):
-            if index is not None:
-                if data is None:
-                    data = np.nan
+            if index is not None and data is None:
+                data = np.nan
 
             if index is not None:
                 npoints = len(index)
@@ -575,8 +574,7 @@ def density(self):
         >>> s.density
         0.6
         """
-        r = float(self.sp_index.npoints) / float(self.sp_index.length)
-        return r
+        return float(self.sp_index.npoints) / float(self.sp_index.length)
 
     @property
     def npoints(self) -> int:
@@ -736,25 +734,17 @@ def value_counts(self, dropna=True):
 
         keys, counts = algos.value_counts_arraylike(self.sp_values, dropna=dropna)
         fcounts = self.sp_index.ngaps
-        if fcounts > 0:
-            if self._null_fill_value and dropna:
-                pass
+        if fcounts > 0 and (not self._null_fill_value or not dropna):
+            mask = isna(keys) if self._null_fill_value else keys == self.fill_value
+            if mask.any():
+                counts[mask] += fcounts
             else:
-                if self._null_fill_value:
-                    mask = isna(keys)
-                else:
-                    mask = keys == self.fill_value
-
-                if mask.any():
-                    counts[mask] += fcounts
-                else:
-                    keys = np.insert(keys, 0, self.fill_value)
-                    counts = np.insert(counts, 0, fcounts)
+                keys = np.insert(keys, 0, self.fill_value)
+                counts = np.insert(counts, 0, fcounts)
 
         if not isinstance(keys, ABCIndexClass):
             keys = Index(keys)
-        result = Series(counts, index=keys)
-        return result
+        return Series(counts, index=keys)
 
     # --------
     # Indexing
@@ -1062,7 +1052,7 @@ def astype(self, dtype=None, copy=True):
         if is_dtype_equal(dtype, self._dtype):
             if not copy:
                 return self
-            elif copy:
+            else:
                 return self.copy()
         dtype = self.dtype.update_dtype(dtype)
         subtype = dtype._subtype_with_str
diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py
index 0d9d257810674..806e784799f4e 100644
--- a/pandas/core/arrays/timedeltas.py
+++ b/pandas/core/arrays/timedeltas.py
@@ -227,8 +227,7 @@ def _from_sequence(
         data, inferred_freq = sequence_to_td64ns(data, copy=copy, unit=None)
         freq, _ = dtl.validate_inferred_freq(None, inferred_freq, False)
 
-        result = cls._simple_new(data, freq=freq)
-        return result
+        return cls._simple_new(data, freq=freq)
 
     @classmethod
     def _from_sequence_not_strict(
@@ -338,10 +337,9 @@ def astype(self, dtype, copy: bool = True):
             if self._hasnans:
                 # avoid double-copying
                 result = self._data.astype(dtype, copy=False)
-                values = self._maybe_mask_results(
+                return self._maybe_mask_results(
                     result, fill_value=None, convert="float64"
                 )
-                return values
             result = self._data.astype(dtype, copy=copy)
             return result.astype("i8")
         elif is_timedelta64_ns_dtype(dtype):

From a4c46e78415010a36da8d5acb0f56fbf62d93685 Mon Sep 17 00:00:00 2001
From: Marco Gorelli <m.e.gorelli@gmail.com>
Date: Wed, 4 Nov 2020 07:42:59 +0000
Subject: [PATCH 2/2] add parens, fix failing doctest

---
 pandas/core/arrays/base.py    | 3 ++-
 pandas/core/arrays/boolean.py | 8 +++++---
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
index 3216957e1f188..82d79cc47a4ae 100644
--- a/pandas/core/arrays/base.py
+++ b/pandas/core/arrays/base.py
@@ -335,7 +335,8 @@ def __iter__(self):
         # This needs to be implemented so that pandas recognizes extension
         # arrays as list-like. The default implementation makes successive
         # calls to ``__getitem__``, which may be slower than necessary.
-        yield from self
+        for i in range(len(self)):
+            yield self[i]
 
     def __eq__(self, other: Any) -> ArrayLike:
         """
diff --git a/pandas/core/arrays/boolean.py b/pandas/core/arrays/boolean.py
index e992e7478017e..21306455573b8 100644
--- a/pandas/core/arrays/boolean.py
+++ b/pandas/core/arrays/boolean.py
@@ -170,9 +170,11 @@ def coerce_to_array(
         values[~mask_values] = values_object[~mask_values].astype(bool)
 
         # if the values were integer-like, validate it were actually 0/1's
-        if inferred_dtype in integer_like and not np.all(
-            values[~mask_values].astype(float)
-            == values_object[~mask_values].astype(float)
+        if (inferred_dtype in integer_like) and not (
+            np.all(
+                values[~mask_values].astype(float)
+                == values_object[~mask_values].astype(float)
+            )
         ):
             raise TypeError("Need to pass bool-like values")