Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CLN refactor core/arrays #37581

Merged
merged 3 commits into from
Nov 4, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 4 additions & 5 deletions pandas/core/arrays/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -460,7 +460,7 @@ def astype(self, dtype, copy=True):
if is_dtype_equal(dtype, self.dtype):
if not copy:
return self
elif copy:
else:
return self.copy()
if isinstance(dtype, StringDtype): # allow conversion to StringArrays
return dtype.construct_array_type()._from_sequence(self, copy=False)
Expand Down Expand Up @@ -544,14 +544,13 @@ def argsort(
ascending = nv.validate_argsort_with_ascending(ascending, args, kwargs)

values = self._values_for_argsort()
result = nargsort(
return nargsort(
values,
kind=kind,
ascending=ascending,
na_position=na_position,
mask=np.asarray(self.isna()),
)
return result

def argmin(self):
"""
Expand Down Expand Up @@ -780,12 +779,12 @@ def equals(self, other: object) -> bool:
boolean
Whether the arrays are equivalent.
"""
if not type(self) == type(other):
if type(self) != type(other):
return False
other = cast(ExtensionArray, other)
if not is_dtype_equal(self.dtype, other.dtype):
return False
elif not len(self) == len(other):
elif len(self) != len(other):
return False
else:
equal_values = self == other
Expand Down
20 changes: 10 additions & 10 deletions pandas/core/arrays/boolean.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,12 +170,13 @@ def coerce_to_array(
values[~mask_values] = values_object[~mask_values].astype(bool)

# if the values were integer-like, validate it were actually 0/1's
if inferred_dtype in integer_like:
if not np.all(
if (inferred_dtype in integer_like) and not (
np.all(
values[~mask_values].astype(float)
== values_object[~mask_values].astype(float)
):
raise TypeError("Need to pass bool-like values")
)
):
raise TypeError("Need to pass bool-like values")

if mask is None and mask_values is None:
mask = np.zeros(len(values), dtype=bool)
Expand All @@ -193,9 +194,9 @@ def coerce_to_array(
if mask_values is not None:
mask = mask | mask_values

if not values.ndim == 1:
if values.ndim != 1:
raise ValueError("values must be a 1D list-like")
if not mask.ndim == 1:
if mask.ndim != 1:
raise ValueError("mask must be a 1D list-like")

return values, mask
Expand Down Expand Up @@ -395,9 +396,8 @@ def astype(self, dtype, copy: bool = True) -> ArrayLike:
self._data.astype(dtype.numpy_dtype), self._mask.copy(), copy=False
)
# for integer, error if there are missing values
if is_integer_dtype(dtype):
if self._hasna:
raise ValueError("cannot convert NA to integer")
if is_integer_dtype(dtype) and self._hasna:
raise ValueError("cannot convert NA to integer")
# for float dtype, ensure we use np.nan before casting (numpy cannot
# deal with pd.NA)
na_value = self._na_value
Expand Down Expand Up @@ -576,7 +576,7 @@ def _logical_method(self, other, op):
elif isinstance(other, np.bool_):
other = other.item()

if other_is_scalar and not (other is libmissing.NA or lib.is_bool(other)):
if other_is_scalar and other is not libmissing.NA and not lib.is_bool(other):
raise TypeError(
"'other' should be pandas.NA or a bool. "
f"Got {type(other).__name__} instead."
Expand Down
8 changes: 3 additions & 5 deletions pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -1314,8 +1314,7 @@ def isna(self):
Categorical.notna : Boolean inverse of Categorical.isna.

"""
ret = self._codes == -1
return ret
return self._codes == -1

isnull = isna

Expand Down Expand Up @@ -1363,7 +1362,7 @@ def value_counts(self, dropna=True):
from pandas import CategoricalIndex, Series

code, cat = self._codes, self.categories
ncat, mask = len(cat), 0 <= code
ncat, mask = (len(cat), code >= 0)
ix, clean = np.arange(ncat), mask.all()

if dropna or clean:
Expand Down Expand Up @@ -1920,8 +1919,7 @@ def _reverse_indexer(self) -> Dict[Hashable, np.ndarray]:
)
counts = counts.cumsum()
_result = (r[start:end] for start, end in zip(counts, counts[1:]))
result = dict(zip(categories, _result))
return result
return dict(zip(categories, _result))

# ------------------------------------------------------------------
# Reductions
Expand Down
6 changes: 2 additions & 4 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -1063,8 +1063,7 @@ def _time_shift(self, periods, freq=None):
if isinstance(freq, str):
freq = to_offset(freq)
offset = periods * freq
result = self + offset
return result
return self + offset

if periods == 0 or len(self) == 0:
# GH#14811 empty case
Expand Down Expand Up @@ -1534,10 +1533,9 @@ def _round(self, freq, mode, ambiguous, nonexistent):
self = cast("DatetimeArray", self)
naive = self.tz_localize(None)
result = naive._round(freq, mode, ambiguous, nonexistent)
aware = result.tz_localize(
return result.tz_localize(
self.tz, ambiguous=ambiguous, nonexistent=nonexistent
)
return aware

values = self.view("i8")
result = round_nsint64(values, mode, freq)
Expand Down
7 changes: 4 additions & 3 deletions pandas/core/arrays/masked.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,9 +84,9 @@ def __init__(self, values: np.ndarray, mask: np.ndarray, copy: bool = False):
"mask should be boolean numpy array. Use "
"the 'pd.array' function instead"
)
if not values.ndim == 1:
if values.ndim != 1:
raise ValueError("values must be a 1D array")
if not mask.ndim == 1:
if mask.ndim != 1:
raise ValueError("mask must be a 1D array")

if copy:
Expand Down Expand Up @@ -209,7 +209,8 @@ def to_numpy(
dtype = object
if self._hasna:
if (
not (is_object_dtype(dtype) or is_string_dtype(dtype))
not is_object_dtype(dtype)
and not is_string_dtype(dtype)
and na_value is libmissing.NA
):
raise ValueError(
Expand Down
6 changes: 2 additions & 4 deletions pandas/core/arrays/numpy_.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,17 +281,15 @@ def all(self, *, axis=None, out=None, keepdims=False, skipna=True):

def min(self, *, skipna: bool = True, **kwargs) -> Scalar:
nv.validate_min((), kwargs)
result = masked_reductions.min(
return masked_reductions.min(
values=self.to_numpy(), mask=self.isna(), skipna=skipna
)
return result

def max(self, *, skipna: bool = True, **kwargs) -> Scalar:
nv.validate_max((), kwargs)
result = masked_reductions.max(
return masked_reductions.max(
values=self.to_numpy(), mask=self.isna(), skipna=skipna
)
return result

def sum(self, *, axis=None, skipna=True, min_count=0, **kwargs) -> Scalar:
nv.validate_sum((), kwargs)
Expand Down
6 changes: 2 additions & 4 deletions pandas/core/arrays/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -589,7 +589,7 @@ def astype(self, dtype, copy: bool = True):
if is_dtype_equal(dtype, self._dtype):
if not copy:
return self
elif copy:
else:
return self.copy()
if is_period_dtype(dtype):
return self.asfreq(dtype.freq)
Expand Down Expand Up @@ -1080,11 +1080,9 @@ def _make_field_arrays(*fields):
elif length is None:
length = len(x)

arrays = [
return [
np.asarray(x)
if isinstance(x, (np.ndarray, list, ABCSeries))
else np.repeat(x, length)
for x in fields
]

return arrays
32 changes: 11 additions & 21 deletions pandas/core/arrays/sparse/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -316,9 +316,8 @@ def __init__(
raise Exception("must only pass scalars with an index")

if is_scalar(data):
if index is not None:
if data is None:
data = np.nan
if index is not None and data is None:
data = np.nan

if index is not None:
npoints = len(index)
Expand Down Expand Up @@ -575,8 +574,7 @@ def density(self):
>>> s.density
0.6
"""
r = float(self.sp_index.npoints) / float(self.sp_index.length)
return r
return float(self.sp_index.npoints) / float(self.sp_index.length)

@property
def npoints(self) -> int:
Expand Down Expand Up @@ -736,25 +734,17 @@ def value_counts(self, dropna=True):

keys, counts = algos.value_counts_arraylike(self.sp_values, dropna=dropna)
fcounts = self.sp_index.ngaps
if fcounts > 0:
if self._null_fill_value and dropna:
pass
if fcounts > 0 and (not self._null_fill_value or not dropna):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is nice but it may have changed the semantics

mask = isna(keys) if self._null_fill_value else keys == self.fill_value
if mask.any():
counts[mask] += fcounts
else:
if self._null_fill_value:
mask = isna(keys)
else:
mask = keys == self.fill_value

if mask.any():
counts[mask] += fcounts
else:
keys = np.insert(keys, 0, self.fill_value)
counts = np.insert(counts, 0, fcounts)
keys = np.insert(keys, 0, self.fill_value)
counts = np.insert(counts, 0, fcounts)

if not isinstance(keys, ABCIndexClass):
keys = Index(keys)
result = Series(counts, index=keys)
return result
return Series(counts, index=keys)

# --------
# Indexing
Expand Down Expand Up @@ -1062,7 +1052,7 @@ def astype(self, dtype=None, copy=True):
if is_dtype_equal(dtype, self._dtype):
if not copy:
return self
elif copy:
else:
return self.copy()
dtype = self.dtype.update_dtype(dtype)
subtype = dtype._subtype_with_str
Expand Down
6 changes: 2 additions & 4 deletions pandas/core/arrays/timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,8 +227,7 @@ def _from_sequence(
data, inferred_freq = sequence_to_td64ns(data, copy=copy, unit=None)
freq, _ = dtl.validate_inferred_freq(None, inferred_freq, False)

result = cls._simple_new(data, freq=freq)
return result
return cls._simple_new(data, freq=freq)

@classmethod
def _from_sequence_not_strict(
Expand Down Expand Up @@ -334,10 +333,9 @@ def astype(self, dtype, copy: bool = True):
if self._hasnans:
# avoid double-copying
result = self._data.astype(dtype, copy=False)
values = self._maybe_mask_results(
return self._maybe_mask_results(
result, fill_value=None, convert="float64"
)
return values
result = self._data.astype(dtype, copy=copy)
return result.astype("i8")
elif is_timedelta64_ns_dtype(dtype):
Expand Down