-
-
Notifications
You must be signed in to change notification settings - Fork 18k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
ExtensionArray.take default implementation #20814
Changes from 3 commits
fb3c234
dacd98e
0be9ec6
08f2479
eba137f
67ba9dd
37915e9
c721915
125ca0b
b7ae0bc
338566f
31cd304
05d8844
69e7fe7
449983b
c449afd
82cad8b
d5470a0
bbcbf19
1a4d987
fc729d6
74b2c09
5db6624
741f284
fbc4425
f3b91ca
eecd632
9a6c7d4
7c4f625
eb43fa4
6858409
ec0cecd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1448,8 +1448,9 @@ def func(arr, indexer, out, fill_value=np.nan): | |
return func | ||
|
||
|
||
def take(arr, indexer, allow_fill=False, fill_value=None): | ||
"""Take elements from an array. | ||
def take(arr, indices, allow_fill=False, fill_value=None): | ||
""" | ||
Take elements from an array. | ||
|
||
.. versionadded:: 0.23.0 | ||
|
||
|
@@ -1458,22 +1459,23 @@ def take(arr, indexer, allow_fill=False, fill_value=None): | |
arr : sequence | ||
Non array-likes (sequences without a dtype) are coereced | ||
to an ndarray. | ||
indexer : sequence of integers | ||
indices : sequence of integers | ||
Indices to be taken. | ||
allow_fill : bool, default False | ||
How to handle negative values in `indexer`. | ||
How to handle negative values in `indices`. | ||
|
||
* False: negative values in `indexer` indicate | ||
slices from the right (the default) | ||
* False: negative values in `indices` indicate positional indicies | ||
from the right (the default). This is similar to :func:`numpy.take`. | ||
|
||
* True: negative values in `indexer` indicate | ||
* True: negative values in `indices` indicate | ||
missing values. These values are set to `fill_value`. Any other | ||
other negative values raise a ``ValueError``. | ||
|
||
fill_value : any, optional | ||
Fill value to use for NA-indicies when `allow_fill` is True. | ||
This may be ``None``, in which case the default NA value for | ||
the type, ``self.dtype.na_value``, is used. | ||
the type is used. For ndarrays, :attr:`numpy.nan` is used. For | ||
ExtensionArrays, a different value may be used. | ||
|
||
Returns | ||
------- | ||
|
@@ -1483,17 +1485,17 @@ def take(arr, indexer, allow_fill=False, fill_value=None): | |
Raises | ||
------ | ||
IndexError | ||
When the indexer is out of bounds for the array. | ||
When `indices` is out of bounds for the array. | ||
ValueError | ||
When the indexer contains negative values other than ``-1`` | ||
and `allow_fill` is True. | ||
|
||
Notes | ||
----- | ||
When `allow_fill` is False, `indexer` may be whatever dimensionality | ||
When `allow_fill` is False, `indices` may be whatever dimensionality | ||
is accepted by NumPy for `arr`. | ||
|
||
When `allow_fill` is True, `indexer` should be 1-D. | ||
When `allow_fill` is True, `indices` should be 1-D. | ||
|
||
See Also | ||
-------- | ||
|
@@ -1504,7 +1506,7 @@ def take(arr, indexer, allow_fill=False, fill_value=None): | |
>>> from pandas.api.extensions import take | ||
|
||
With the default ``allow_fill=False``, negative numbers indicate | ||
slices from the right. | ||
positional indicies from the right. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. indicies -> indices |
||
|
||
>>> take(np.array([10, 20, 30]), [0, 0, -1]) | ||
array([10, 10, 30]) | ||
|
@@ -1524,15 +1526,15 @@ def take(arr, indexer, allow_fill=False, fill_value=None): | |
arr = np.asarray(arr) | ||
|
||
# Do we require int64 or intp here? | ||
indexer = np.asarray(indexer, dtype='int') | ||
indices = np.asarray(indices, dtype='int') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should be intp, which is what take accepts for indexers |
||
|
||
if allow_fill: | ||
# Pandas style, -1 means NA | ||
validate_indices(indexer, len(arr)) | ||
result = take_1d(arr, indexer, allow_fill=True, fill_value=fill_value) | ||
validate_indices(indices, len(arr)) | ||
result = take_1d(arr, indices, allow_fill=True, fill_value=fill_value) | ||
else: | ||
# NumPy style | ||
result = arr.take(indexer) | ||
result = arr.take(indices) | ||
return result | ||
|
||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -463,45 +463,51 @@ def factorize(self, na_sentinel=-1): | |
# Indexing methods | ||
# ------------------------------------------------------------------------ | ||
|
||
def take(self, indexer, allow_fill=False, fill_value=None): | ||
def take(self, indices, allow_fill=False, fill_value=None): | ||
# type: (Sequence[int], bool, Optional[Any]) -> ExtensionArray | ||
"""Take elements from an array. | ||
|
||
Parameters | ||
---------- | ||
indexer : sequence of integers | ||
Indices to be taken. See Notes for how negative indicies | ||
are handled. | ||
indices : sequence of integers | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you share this doc? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we want people to look at the source code of this (as we explicitly say the code of this base class is kind of the documentation), so I would certainly try to keep this one here (not sure if it would be easy to reuse it for the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sharing docs between these two are a bit difficult because of circular import issues. The docstring would have to go in a third module, which isn't desirable. |
||
Indices to be taken. | ||
allow_fill : bool, default False | ||
How to handle negative values in `indexer`. | ||
How to handle negative values in `indices`. | ||
|
||
For False values (the default), negative values in `indexer` | ||
indiciate slices from the right. | ||
For False values (the default), negative values in `indices` | ||
indiciate positional indicies from the right. | ||
|
||
For True values, indicies where `indexer` is ``-1`` indicate | ||
For True values, indicies where `indices` is ``-1`` indicate | ||
missing values. These values are set to `fill_value`. Any other | ||
other negative value should raise a ``ValueError``. | ||
|
||
fill_value : any, optional | ||
Fill value to use for NA-indicies when `allow_fill` is True. | ||
This may be ``None``, in which case the default NA value for | ||
the type, ``self.dtype.na_value``, is used. | ||
|
||
For many ExtensionArrays, there will be two representations of | ||
`fill_value`: a user-facing "boxed" scalar, and a low-level | ||
physical NA value. `fill_value` should be the user-facing version, | ||
and the implementation should handle translating that to the | ||
physical version for processing the take if nescessary. | ||
|
||
Returns | ||
------- | ||
ExtensionArray | ||
|
||
Raises | ||
------ | ||
IndexError | ||
When the indexer is out of bounds for the array. | ||
When the indices are out of bounds for the array. | ||
ValueError | ||
When the indexer contains negative values other than ``-1`` | ||
When `indices` contains negative values other than ``-1`` | ||
and `allow_fill` is True. | ||
|
||
Notes | ||
----- | ||
ExtensionArray.take is called by ``Series.__getitem__``, ``.loc``, | ||
``iloc``, when the indexer is a sequence of values. Additionally, | ||
``iloc``, when `indices` is a sequence of values. Additionally, | ||
it's called by :meth:`Series.reindex`, or any other method | ||
that causes realignemnt, with a `fill_value`. | ||
|
||
|
@@ -518,14 +524,17 @@ def take(self, indexer, allow_fill=False, fill_value=None): | |
|
||
.. code-block:: python | ||
|
||
def take(self, indexer, allow_fill=False, fill_value=None): | ||
def take(self, indices, allow_fill=False, fill_value=None): | ||
from pandas.core.algorithms import take | ||
|
||
# If the ExtensionArray is backed by an ndarray, then | ||
# just pass that here instead of coercing to object. | ||
data = self.astype(object) | ||
|
||
if allow_fill and fill_value is None: | ||
fill_value = self.dtype.na_value | ||
|
||
result = take(data, indexer, fill_value=fill_value, | ||
result = take(data, indices, fill_value=fill_value, | ||
allow_fill=allow_fill) | ||
return self._from_sequence(result) | ||
""" | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,7 +17,8 @@ class _DtypeOpsMixin(object): | |
# class's methods can be moved to ExtensionDtype and removed. | ||
|
||
# na_value is the default NA value to use for this type. This is used in | ||
# e.g. ExtensionArray.take. | ||
# e.g. ExtensionArray.take. This should be the user-facing "boxed" version | ||
# of the NA value, not the physical NA vaalue for storage. | ||
na_value = np.nan | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. maybe even show an example (from JSON?) |
||
|
||
def __eq__(self, other): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
"This may be None" -> "By default" ? (or at least mention "default" somewhere)