Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: Add Series.set_index #27504

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ Enhancements

.. _whatsnew_1000.enhancements.other:

-
- :class:`Series` gained a :meth:`Series.set_index`, which facilitates the use of method-chaining.
-

Other enhancements
Expand Down
129 changes: 128 additions & 1 deletion pandas/core/series.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
"""
Data structure for 1-dimensional cross-sectional and time series data
"""
from collections import OrderedDict
from collections import OrderedDict, abc
from io import StringIO
from shutil import get_terminal_size
from textwrap import dedent
Expand Down Expand Up @@ -42,6 +42,8 @@
ABCDataFrame,
ABCDatetimeArray,
ABCDatetimeIndex,
ABCIndexClass,
ABCMultiIndex,
ABCSeries,
ABCSparseArray,
ABCSparseSeries,
Expand All @@ -67,6 +69,7 @@
InvalidIndexError,
MultiIndex,
ensure_index,
ensure_index_from_sequences,
)
from pandas.core.indexers import maybe_convert_indices
from pandas.core.indexes.accessors import CombinedDatetimelikeProperties
Expand Down Expand Up @@ -1417,6 +1420,130 @@ def _set_value(self, label, value, takeable: bool = False):

return self

def set_index(self, labels, append=False, inplace=False, verify_integrity=False):
"""
Set a new index for the Series.

This method can take either:
- an array-like to be used as labels for the new index. It length
must match the length of the frame.
- a list of array-likes. The new index will be a MultiIndex and each item
in the list will serve as a level in it.

Parameters
----------
labels : array-like or list of array-likes
Each array must have the same length as the calling Series.
If a list of array-likes is passed, the new index will be a MultiIndex.
array-like in this context means a 1D Pandas object like
Index/MultiIndex/Series, an ndarray or abc.Iter.
append : bool, default False
If True, convert the existing index to a MultiIndex and
add the new labels to it as a new level.
inplace : bool, default False
Modify the DataFrame in place (do not create a new object).
verify_integrity : bool, default False
Check the new index for duplicates. Otherwise defer the check until
necessary. Setting to False will improve the performance of this
method.

Returns
-------
Series
With new index.

See Also
--------
Series.reset_index : Opposite of set_index.
Series.reindex : Change to new indices or expand indices.
Series.reindex_like : Change to same indices as another FrameOrSeries.

Examples
--------
>>> ser = pd.Series([1,2,3])
>>> ser.set_index(pd.Index(['D', 'E', 'F']))
D 1
E 2
F 3
dtype: int64
"""
inplace = validate_bool_kwarg(inplace, "inplace")
if not isinstance(labels, list):
labels = [labels]

err_msg = (
'The parameter "labels" may be a column key, one-dimensional '
"array, or a list containing only "
"one-dimensional arrays."
)

for col in labels:
if isinstance(
col, (ABCIndexClass, ABCSeries, np.ndarray, list, abc.Iterator)
):
# arrays are fine as long as they are one-dimensional
# iterators get converted to list below
if getattr(col, "ndim", 1) != 1:
raise ValueError(err_msg)

if inplace:
ser = self
else:
ser = self.copy()

arrays = []
names = []
if append:
names = [x for x in self.index.names]
if isinstance(self.index, ABCMultiIndex):
for i in range(self.index.nlevels):
arrays.append(self.index._get_level_values(i))
else:
arrays.append(self.index)

for col in labels:
if isinstance(col, ABCMultiIndex):
for n in range(col.nlevels):
arrays.append(col._get_level_values(n))
names.extend(col.names)
elif isinstance(col, (ABCIndexClass, ABCSeries)):
# if Index then not MultiIndex (treated above)
arrays.append(col)
names.append(col.name)
elif isinstance(col, (list, np.ndarray)):
arrays.append(col)
names.append(None)
elif isinstance(col, abc.Iterator):
arrays.append(list(col))
names.append(None)
# from here, col can only be a column label
else:
raise ValueError("MultiIndex Levels must be array-like")

if len(arrays[-1]) != len(self):
# check newest element against length of calling ser, since
# ensure_index_from_sequences would not raise for append=False.
raise ValueError(
"Length mismatch: Expected {len_self} rows, "
"received array of length {len_col}".format(
len_self=len(self), len_col=len(arrays[-1])
)
)

index = ensure_index_from_sequences(arrays, names)

if verify_integrity and not index.is_unique:
duplicates = index[index.duplicated()].unique()
raise ValueError("Index has duplicate keys: {dup}".format(dup=duplicates))

# clear up memory usage
index._cleanup()

ser.index = index

if not inplace:
return ser

def reset_index(self, level=None, drop=False, name=None, inplace=False):
"""
Generate a new DataFrame or Series with the index reset.
Expand Down
50 changes: 50 additions & 0 deletions pandas/tests/series/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -757,3 +757,53 @@ def test_dt_accessor_api_for_categorical(self):
with pytest.raises(AttributeError, match=msg):
invalid.dt
assert not hasattr(invalid, "str")

@pytest.mark.parametrize("arrayklass", [iter, np.array, pd.Series, pd.Index])
def test_set_index(self, arrayklass):
ser = pd.Series([0, 1, 2])

res = ser.set_index(arrayklass(["A", "B", "C"]))
exp = pd.Series([0, 1, 2], index=["A", "B", "C"])
tm.assert_series_equal(res, exp)

# inplace
ser = pd.Series([0, 1, 2])
ser.set_index(arrayklass(["A", "B", "C"]), inplace=True)
exp = pd.Series([0, 1, 2], index=pd.Index(["A", "B", "C"]))
tm.assert_series_equal(ser, exp)

# check for duplicates
with pytest.raises(ValueError, match="duplicate keys"):
ser.set_index(arrayklass(["A", "B", "B"]), verify_integrity=True)

# MultiIndex
ser = pd.Series([0, 1, 2])
levels = [pd.Series(["A", "B", "C"]), pd.Series(["x", "y", "z"])]
ix = pd.MultiIndex.from_arrays(levels)
res = ser.set_index(levels)
exp = pd.Series([0, 1, 2], index=ix)
tm.assert_series_equal(res, exp)

# append
ser = pd.Series([0, 1, 2])
labels = pd.Series(["A", "B", "C"])
ix = pd.MultiIndex.from_arrays([ser.index, labels])
res = ser.set_index(pd.Series(["A", "B", "C"]), append=True)
exp = pd.Series([0, 1, 2], index=ix)
tm.assert_series_equal(res, exp)

# append MultIndex
ser = pd.Series([0, 1, 2])
level1 = pd.Series(["A", "B", "C"])
level2 = pd.Series(["X", "Y", "Z"])
ix = pd.MultiIndex.from_arrays([level1, level2])
exp_ix = pd.MultiIndex.from_arrays([ser.index, level1, level2])
res = ser.set_index(ix, append=True)
exp = pd.Series([0, 1, 2], index=exp_ix)
tm.assert_series_equal(res, exp)

def test_set_index_raises(self):
ser = pd.Series([0, 1, 2])

with pytest.raises(ValueError, match="must be array"):
ser.set_index(["A", "B", "C"])