Skip to content

Commit

Permalink
PERF: improve efficiency of BaseMaskedArray.__setitem__
Browse files Browse the repository at this point in the history
This somewhat deals with #44172, though that won't be fully resolved until 2D `ExtensionArray`s are supported (per the comments there).
  • Loading branch information
alexreg committed Nov 28, 2021
1 parent 040f236 commit fb2b5fe
Show file tree
Hide file tree
Showing 5 changed files with 29 additions and 15 deletions.
6 changes: 3 additions & 3 deletions pandas/core/apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def frame_apply(
args=None,
kwargs=None,
) -> FrameApply:
"""construct and return a row or column based frame apply object"""
"""Construct and return a row- or column-based frame apply object."""
axis = obj._get_axis_number(axis)
klass: type[FrameApply]
if axis == 0:
Expand Down Expand Up @@ -693,7 +693,7 @@ def dtypes(self) -> Series:
return self.obj.dtypes

def apply(self) -> DataFrame | Series:
"""compute the results"""
"""Compute the results."""
# dispatch to agg
if is_list_like(self.f):
return self.apply_multiple()
Expand Down Expand Up @@ -1011,7 +1011,7 @@ def result_columns(self) -> Index:
def wrap_results_for_axis(
self, results: ResType, res_index: Index
) -> DataFrame | Series:
"""return the results for the columns"""
"""Return the results for the columns."""
result: DataFrame | Series

# we have requested to expand
Expand Down
3 changes: 3 additions & 0 deletions pandas/core/arrays/boolean.py
Original file line number Diff line number Diff line change
Expand Up @@ -365,6 +365,9 @@ def map_string(s):
def _coerce_to_array(self, value) -> tuple[np.ndarray, np.ndarray]:
return coerce_to_array(value)

def _validate_setitem_value(self, value):
return lib.is_bool(value)

@overload
def astype(self, dtype: npt.DTypeLike, copy: bool = ...) -> np.ndarray:
...
Expand Down
3 changes: 3 additions & 0 deletions pandas/core/arrays/floating.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,9 @@ def _from_sequence_of_strings(
def _coerce_to_array(self, value) -> tuple[np.ndarray, np.ndarray]:
return coerce_to_array(value, dtype=self.dtype)

def _validate_setitem_value(self, value):
return lib.is_float(value)

@overload
def astype(self, dtype: npt.DTypeLike, copy: bool = ...) -> np.ndarray:
...
Expand Down
3 changes: 3 additions & 0 deletions pandas/core/arrays/integer.py
Original file line number Diff line number Diff line change
Expand Up @@ -338,6 +338,9 @@ def _from_sequence_of_strings(
def _coerce_to_array(self, value) -> tuple[np.ndarray, np.ndarray]:
return coerce_to_array(value, dtype=self.dtype)

def _validate_setitem_value(self, value):
return lib.is_integer(value)

@overload
def astype(self, dtype: npt.DTypeLike, copy: bool = ...) -> np.ndarray:
...
Expand Down
29 changes: 17 additions & 12 deletions pandas/core/arrays/masked.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
from pandas.core.dtypes.inference import is_array_like
from pandas.core.dtypes.missing import (
array_equivalent,
is_valid_na_for_dtype,
isna,
notna,
)
Expand Down Expand Up @@ -82,7 +83,7 @@

class BaseMaskedDtype(ExtensionDtype):
"""
Base class for dtypes for BasedMaskedArray subclasses.
Base class for dtypes for BaseMaskedArray subclasses.
"""

name: str
Expand Down Expand Up @@ -213,19 +214,23 @@ def fillna(
def _coerce_to_array(self, values) -> tuple[np.ndarray, np.ndarray]:
raise AbstractMethodError(self)

def __setitem__(self, key, value) -> None:
_is_scalar = is_scalar(value)
if _is_scalar:
value = [value]
value, mask = self._coerce_to_array(value)

if _is_scalar:
value = value[0]
mask = mask[0]
def _validate_setitem_value(self, value) -> bool:
raise AbstractMethodError(self)

def __setitem__(self, key, value) -> None:
key = check_array_indexer(self, key)
self._data[key] = value
self._mask[key] = mask
if is_scalar(value):
if self._validate_setitem_value(value):
self._data[key] = value
self._mask[key] = False
elif isna(value) and is_valid_na_for_dtype(value):
self._mask[key] = True
else:
raise TypeError(f"Invalid value '{value}' for dtype {self.dtype}")
else:
value, mask = self._coerce_to_array(value)
self._data[key] = value
self._mask[key] = mask

def __iter__(self):
if self.ndim == 1:
Expand Down

0 comments on commit fb2b5fe

Please sign in to comment.