Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG: Array.__setitem__ failing with nullable boolean mask #31484

Merged
merged 20 commits into from
Feb 1, 2020
Merged
Show file tree
Hide file tree
Changes from 19 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.0.1.rst
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ Indexing

-
-
- Bug where assigning to a ``Series`` using a IntegerArray / BooleanArray as a mask would raise ``TypeError`` (:issue:`31446`)
Copy link
Member

@MarcoGorelli MarcoGorelli Feb 1, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

:class:`Series`

instead of

``Series``

so that there's a link in the docs?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah, tbh i wasn't quite sure about the rule here, since i see three different representations used in whatsnew

:class:`Series`, ``Series``, Series

but seems having :class:Series is more consistent, will change


Missing
^^^^^^^
Expand Down
2 changes: 2 additions & 0 deletions pandas/core/arrays/boolean.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
from pandas.core.dtypes.missing import isna, notna

from pandas.core import nanops, ops
from pandas.core.indexers import check_array_indexer

from .masked import BaseMaskedArray

Expand Down Expand Up @@ -369,6 +370,7 @@ def __setitem__(self, key, value):
value = value[0]
mask = mask[0]

key = check_array_indexer(self, key)
self._data[key] = value
self._mask[key] = mask

Expand Down
2 changes: 2 additions & 0 deletions pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -2073,6 +2073,8 @@ def __setitem__(self, key, value):

lindexer = self.categories.get_indexer(rvalue)
lindexer = self._maybe_coerce_indexer(lindexer)

key = check_array_indexer(self, key)
self._codes[key] = lindexer

def _reverse_indexer(self) -> Dict[Hashable, np.ndarray]:
Expand Down
2 changes: 2 additions & 0 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -601,6 +601,8 @@ def __setitem__(
f"or array of those. Got '{type(value).__name__}' instead."
)
raise TypeError(msg)

key = check_array_indexer(self, key)
self._data[key] = value
self._maybe_clear_freq()

Expand Down
2 changes: 2 additions & 0 deletions pandas/core/arrays/integer.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from pandas.core.dtypes.missing import isna

from pandas.core import nanops, ops
from pandas.core.indexers import check_array_indexer
from pandas.core.ops import invalid_comparison
from pandas.core.ops.common import unpack_zerodim_and_defer
from pandas.core.tools.numeric import to_numeric
Expand Down Expand Up @@ -414,6 +415,7 @@ def __setitem__(self, key, value):
value = value[0]
mask = mask[0]

key = check_array_indexer(self, key)
jreback marked this conversation as resolved.
Show resolved Hide resolved
self._data[key] = value
self._mask[key] = mask

Expand Down
1 change: 1 addition & 0 deletions pandas/core/arrays/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -541,6 +541,7 @@ def __setitem__(self, key, value):
msg = f"'value' should be an interval type, got {type(value)} instead."
raise TypeError(msg)

key = check_array_indexer(self, key)
# Need to ensure that left and right are updated atomically, so we're
# forced to copy, update the copy, and swap in the new values.
left = self.left.copy(deep=True)
Expand Down
1 change: 1 addition & 0 deletions pandas/core/arrays/numpy_.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,7 @@ def __getitem__(self, item):
def __setitem__(self, key, value):
value = extract_array(value, extract_numpy=True)

key = check_array_indexer(self, key)
scalar_key = lib.is_scalar(key)
scalar_value = lib.is_scalar(value)

Expand Down
2 changes: 2 additions & 0 deletions pandas/core/arrays/string_.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from pandas.core import ops
from pandas.core.arrays import PandasArray
from pandas.core.construction import extract_array
from pandas.core.indexers import check_array_indexer
from pandas.core.missing import isna


Expand Down Expand Up @@ -224,6 +225,7 @@ def __setitem__(self, key, value):
# extract_array doesn't extract PandasArray subclasses
value = value._ndarray

key = check_array_indexer(self, key)
scalar_key = lib.is_scalar(key)
scalar_value = lib.is_scalar(value)
if scalar_key and not scalar_value:
Expand Down
17 changes: 17 additions & 0 deletions pandas/tests/arrays/test_integer.py
Original file line number Diff line number Diff line change
Expand Up @@ -1074,6 +1074,23 @@ def test_cut(bins, right, include_lowest):
tm.assert_categorical_equal(result, expected)


def test_array_setitem_nullable_boolean_mask():
TomAugspurger marked this conversation as resolved.
Show resolved Hide resolved
# GH 31446
ser = pd.Series([1, 2], dtype="Int64")
result = ser.where(ser > 1)
expected = pd.Series([pd.NA, 2], dtype="Int64")
tm.assert_series_equal(result, expected)


def test_array_setitem():
# GH 31446
arr = pd.Series([1, 2], dtype="Int64").array
arr[arr > 1] = 1

expected = pd.array([1, 1], dtype="Int64")
tm.assert_extension_array_equal(arr, expected)


# TODO(jreback) - these need testing / are broken

# shift
Expand Down
12 changes: 12 additions & 0 deletions pandas/tests/extension/base/setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import pytest

import pandas as pd
from pandas.core.arrays.numpy_ import PandasDtype

from .base import BaseExtensionTests

Expand Down Expand Up @@ -195,3 +196,14 @@ def test_setitem_preserves_views(self, data):
data[0] = data[1]
assert view1[0] == data[1]
assert view2[0] == data[1]

def test_setitem_nullable_mask(self, data):
# GH 31446
# TODO: there is some issue with PandasArray, therefore,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you create an issue for this? (e.g. PandasArray support for setitem)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

will do

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@charlesdong1991 is there an opened issue for this yet? I haven't found it and it causes CI of geopandas to fail as contrary to the comment this test is not skipped.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@martinfleis The issue for geopandas is not due to PandasArray not working, but rather because we need to update our __setitem__ implementation (similar to how I updated the __getitem__), will do a PR for that.

# TODO: skip the setitem test for now, and fix it later
if data.dtype != PandasDtype("object"):
arr = data[:5]
expected = data.take([0, 0, 0, 3, 4])
mask = pd.array([True, True, True, False, False])
arr[mask] = data[0]
self.assert_extension_array_equal(expected, arr)
3 changes: 3 additions & 0 deletions pandas/tests/extension/decimal/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import pandas as pd
from pandas.api.extensions import no_default, register_extension_dtype
from pandas.core.arrays import ExtensionArray, ExtensionScalarOpsMixin
from pandas.core.indexers import check_array_indexer


@register_extension_dtype
Expand Down Expand Up @@ -138,6 +139,8 @@ def __setitem__(self, key, value):
value = [decimal.Decimal(v) for v in value]
else:
value = decimal.Decimal(value)

key = check_array_indexer(self, key)
self._data[key] = value

def __len__(self) -> int:
Expand Down