Skip to content

Commit

Permalink
DEPR: inplace kwarg in set_index (pandas-dev#48115)
Browse files Browse the repository at this point in the history
* DEPR: inplace kwarg in set_index

* GH ref
  • Loading branch information
jbrockmendel authored and noatamir committed Nov 9, 2022
1 parent af53fd3 commit 1151c56
Show file tree
Hide file tree
Showing 24 changed files with 87 additions and 52 deletions.
5 changes: 2 additions & 3 deletions doc/source/user_guide/indexing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1723,13 +1723,12 @@ the given columns to a MultiIndex:
frame
Other options in ``set_index`` allow you not drop the index columns or to add
the index in-place (without creating a new object):
the index without creating a copy of the underlying data:

.. ipython:: python
data.set_index('c', drop=False)
data.set_index(['a', 'b'], inplace=True)
data
data.set_index(['a', 'b'], copy=False)
Reset the index
~~~~~~~~~~~~~~~
Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.5.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -849,6 +849,7 @@ Other Deprecations
- Deprecated unused arguments ``encoding`` and ``verbose`` in :meth:`Series.to_excel` and :meth:`DataFrame.to_excel` (:issue:`47912`)
- Deprecated producing a single element when iterating over a :class:`DataFrameGroupBy` or a :class:`SeriesGroupBy` that has been grouped by a list of length 1; A tuple of length one will be returned instead (:issue:`42795`)
- Fixed up warning message of deprecation of :meth:`MultiIndex.lesort_depth` as public method, as the message previously referred to :meth:`MultiIndex.is_lexsorted` instead (:issue:`38701`)
- Deprecated the ``inplace`` keyword in :meth:`DataFrame.set_index`, use ``df = df.set_index(..., copy=False)`` instead (:issue:`48115`)
- Deprecated the ``sort_columns`` argument in :meth:`DataFrame.plot` and :meth:`Series.plot` (:issue:`47563`).
- Deprecated positional arguments for all but the first argument of :meth:`DataFrame.to_stata` and :func:`read_stata`, use keyword arguments instead (:issue:`48128`).

Expand Down
20 changes: 17 additions & 3 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -5819,7 +5819,7 @@ def set_index(
*,
drop: bool = ...,
append: bool = ...,
inplace: Literal[False] = ...,
inplace: Literal[False] | lib.NoDefault = ...,
verify_integrity: bool = ...,
copy: bool | lib.NoDefault = ...,
) -> DataFrame:
Expand All @@ -5844,7 +5844,7 @@ def set_index(
keys,
drop: bool = True,
append: bool = False,
inplace: bool = False,
inplace: bool | lib.NoDefault = lib.no_default,
verify_integrity: bool = False,
copy: bool | lib.NoDefault = lib.no_default,
) -> DataFrame | None:
Expand All @@ -5869,6 +5869,9 @@ def set_index(
Whether to append columns to existing index.
inplace : bool, default False
Whether to modify the DataFrame rather than creating a new one.
.. deprecated:: 1.5.0
verify_integrity : bool, default False
Check the new index for duplicates. Otherwise defer the check until
necessary. Setting to False will improve the performance of this
Expand Down Expand Up @@ -5942,7 +5945,18 @@ def set_index(
3 9 7 2013 84
4 16 10 2014 31
"""
inplace = validate_bool_kwarg(inplace, "inplace")
if inplace is not lib.no_default:
inplace = validate_bool_kwarg(inplace, "inplace")
warnings.warn(
"The 'inplace' keyword in DataFrame.set_index is deprecated "
"and will be removed in a future version. Use "
"`df = df.set_index(..., copy=False)` instead.",
FutureWarning,
stacklevel=find_stack_level(inspect.currentframe()),
)
else:
inplace = False

if inplace:
if copy is not lib.no_default:
raise ValueError("Cannot specify copy when inplace=True")
Expand Down
19 changes: 11 additions & 8 deletions pandas/core/reshape/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -782,9 +782,9 @@ def get_result(self, copy: bool = True) -> DataFrame:
if self.indicator:
result = self._indicator_post_merge(result)

self._maybe_add_join_keys(result, left_indexer, right_indexer)
result = self._maybe_add_join_keys(result, left_indexer, right_indexer)

self._maybe_restore_index_levels(result)
result = self._maybe_restore_index_levels(result)

self._maybe_drop_cross_column(result, self._cross)

Expand Down Expand Up @@ -851,7 +851,7 @@ def _indicator_post_merge(self, result: DataFrame) -> DataFrame:
result = result.drop(labels=["_left_indicator", "_right_indicator"], axis=1)
return result

def _maybe_restore_index_levels(self, result: DataFrame) -> None:
def _maybe_restore_index_levels(self, result: DataFrame) -> DataFrame:
"""
Restore index levels specified as `on` parameters
Expand All @@ -869,7 +869,7 @@ def _maybe_restore_index_levels(self, result: DataFrame) -> None:
Returns
-------
None
DataFrame
"""
names_to_restore = []
for name, left_key, right_key in zip(
Expand All @@ -893,14 +893,15 @@ def _maybe_restore_index_levels(self, result: DataFrame) -> None:
names_to_restore.append(name)

if names_to_restore:
result.set_index(names_to_restore, inplace=True)
result = result.set_index(names_to_restore, copy=False)
return result

def _maybe_add_join_keys(
self,
result: DataFrame,
left_indexer: np.ndarray | None,
right_indexer: np.ndarray | None,
) -> None:
) -> DataFrame:

left_has_missing = None
right_has_missing = None
Expand Down Expand Up @@ -996,11 +997,12 @@ def _maybe_add_join_keys(
for level_name in result.index.names
]

result.set_index(idx_list, inplace=True)
result = result.set_index(idx_list, copy=False)
else:
result.index = Index(key_col, name=name)
else:
result.insert(i, name or f"key_{i}", key_col)
return result

def _get_join_indexers(self) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]:
"""return the join indexers"""
Expand Down Expand Up @@ -1768,7 +1770,8 @@ def get_result(self, copy: bool = True) -> DataFrame:
result = self._reindex_and_concat(
join_index, left_join_indexer, right_join_indexer, copy=copy
)
self._maybe_add_join_keys(result, left_indexer, right_indexer)

result = self._maybe_add_join_keys(result, left_indexer, right_indexer)

return result

Expand Down
2 changes: 1 addition & 1 deletion pandas/io/parsers/arrow_parser_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@ def _finalize_output(self, frame: DataFrame) -> DataFrame:
# String case
if item not in frame.columns:
raise ValueError(f"Index {item} invalid")
frame.set_index(self.index_col, drop=True, inplace=True)
frame = frame.set_index(self.index_col, drop=True, copy=False)
# Clear names if headerless and no name given
if self.header is None and not multi_index_named:
frame.index.names = [None] * len(frame.index.names)
Expand Down
2 changes: 1 addition & 1 deletion pandas/io/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -4663,7 +4663,7 @@ def read(
columns.insert(0, n)
s = super().read(where=where, columns=columns, start=start, stop=stop)
if is_multi_index:
s.set_index(self.levels, inplace=True)
s = s.set_index(self.levels, copy=False)

s = s.iloc[:, 0]

Expand Down
6 changes: 3 additions & 3 deletions pandas/io/sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ def _wrap_result(
frame = _parse_date_columns(frame, parse_dates)

if index_col is not None:
frame.set_index(index_col, inplace=True)
frame = frame.set_index(index_col, copy=False)

return frame

Expand Down Expand Up @@ -979,7 +979,7 @@ def _query_iterator(
self._harmonize_columns(parse_dates=parse_dates)

if self.index is not None:
self.frame.set_index(self.index, inplace=True)
self.frame = self.frame.set_index(self.index, copy=False)

yield self.frame

Expand Down Expand Up @@ -1020,7 +1020,7 @@ def read(
self._harmonize_columns(parse_dates=parse_dates)

if self.index is not None:
self.frame.set_index(self.index, inplace=True)
self.frame = self.frame.set_index(self.index, copy=False)

return self.frame

Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/frame/methods/test_combine_first.py
Original file line number Diff line number Diff line change
Expand Up @@ -387,8 +387,8 @@ def test_combine_first_string_dtype_only_na(self, nullable_string_dtype):
{"a": ["962", "85"], "b": [pd.NA] * 2}, dtype=nullable_string_dtype
)
df2 = DataFrame({"a": ["85"], "b": [pd.NA]}, dtype=nullable_string_dtype)
df.set_index(["a", "b"], inplace=True)
df2.set_index(["a", "b"], inplace=True)
df = df.set_index(["a", "b"], copy=False)
df2 = df2.set_index(["a", "b"], copy=False)
result = df.combine_first(df2)
expected = DataFrame(
{"a": ["962", "85"], "b": [pd.NA] * 2}, dtype=nullable_string_dtype
Expand Down
11 changes: 8 additions & 3 deletions pandas/tests/frame/methods/test_set_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,11 @@ def test_set_index_copy(self):

msg = "Cannot specify copy when inplace=True"
with pytest.raises(ValueError, match=msg):
df.set_index("A", inplace=True, copy=True)
with tm.assert_produces_warning(FutureWarning, match="The 'inplace'"):
df.set_index("A", inplace=True, copy=True)
with pytest.raises(ValueError, match=msg):
df.set_index("A", inplace=True, copy=False)
with tm.assert_produces_warning(FutureWarning, match="The 'inplace'"):
df.set_index("A", inplace=True, copy=False)

def test_set_index_multiindex(self):
# segfault in GH#3308
Expand Down Expand Up @@ -197,7 +199,10 @@ def test_set_index_drop_inplace(self, frame_of_index_cols, drop, inplace, keys):

if inplace:
result = df.copy()
return_value = result.set_index(keys, drop=drop, inplace=True)
with tm.assert_produces_warning(
FutureWarning, match="The 'inplace' keyword"
):
return_value = result.set_index(keys, drop=drop, inplace=True)
assert return_value is None
else:
result = df.set_index(keys, drop=drop)
Expand Down
3 changes: 2 additions & 1 deletion pandas/tests/frame/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,8 @@ def _check_f(base, f):

# set_index
f = lambda x: x.set_index("a", inplace=True)
_check_f(data.copy(), f)
with tm.assert_produces_warning(FutureWarning, match="The 'inplace' keyword"):
_check_f(data.copy(), f)

# reset_index
f = lambda x: x.reset_index(inplace=True)
Expand Down
18 changes: 12 additions & 6 deletions pandas/tests/frame/test_query_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -436,7 +436,8 @@ def test_date_index_query(self):
df = DataFrame(np.random.randn(n, 3))
df["dates1"] = date_range("1/1/2012", periods=n)
df["dates3"] = date_range("1/1/2014", periods=n)
return_value = df.set_index("dates1", inplace=True, drop=True)
with tm.assert_produces_warning(FutureWarning, match="The 'inplace' keyword"):
return_value = df.set_index("dates1", inplace=True, drop=True)
assert return_value is None
res = df.query("index < 20130101 < dates3", engine=engine, parser=parser)
expec = df[(df.index < "20130101") & ("20130101" < df.dates3)]
Expand All @@ -449,7 +450,8 @@ def test_date_index_query_with_NaT(self):
df["dates1"] = date_range("1/1/2012", periods=n)
df["dates3"] = date_range("1/1/2014", periods=n)
df.iloc[0, 0] = pd.NaT
return_value = df.set_index("dates1", inplace=True, drop=True)
with tm.assert_produces_warning(FutureWarning, match="The 'inplace' keyword"):
return_value = df.set_index("dates1", inplace=True, drop=True)
assert return_value is None
res = df.query("index < 20130101 < dates3", engine=engine, parser=parser)
expec = df[(df.index < "20130101") & ("20130101" < df.dates3)]
Expand All @@ -463,7 +465,8 @@ def test_date_index_query_with_NaT_duplicates(self):
d["dates3"] = date_range("1/1/2014", periods=n)
df = DataFrame(d)
df.loc[np.random.rand(n) > 0.5, "dates1"] = pd.NaT
return_value = df.set_index("dates1", inplace=True, drop=True)
with tm.assert_produces_warning(FutureWarning, match="The 'inplace' keyword"):
return_value = df.set_index("dates1", inplace=True, drop=True)
assert return_value is None
res = df.query("dates1 < 20130101 < dates3", engine=engine, parser=parser)
expec = df[(df.index.to_series() < "20130101") & ("20130101" < df.dates3)]
Expand Down Expand Up @@ -794,7 +797,8 @@ def test_date_index_query(self):
df = DataFrame(np.random.randn(n, 3))
df["dates1"] = date_range("1/1/2012", periods=n)
df["dates3"] = date_range("1/1/2014", periods=n)
return_value = df.set_index("dates1", inplace=True, drop=True)
with tm.assert_produces_warning(FutureWarning, match="The 'inplace' keyword"):
return_value = df.set_index("dates1", inplace=True, drop=True)
assert return_value is None
res = df.query(
"(index < 20130101) & (20130101 < dates3)", engine=engine, parser=parser
Expand All @@ -809,7 +813,8 @@ def test_date_index_query_with_NaT(self):
df["dates1"] = date_range("1/1/2012", periods=n)
df["dates3"] = date_range("1/1/2014", periods=n)
df.iloc[0, 0] = pd.NaT
return_value = df.set_index("dates1", inplace=True, drop=True)
with tm.assert_produces_warning(FutureWarning, match="The 'inplace' keyword"):
return_value = df.set_index("dates1", inplace=True, drop=True)
assert return_value is None
res = df.query(
"(index < 20130101) & (20130101 < dates3)", engine=engine, parser=parser
Expand All @@ -824,7 +829,8 @@ def test_date_index_query_with_NaT_duplicates(self):
df["dates1"] = date_range("1/1/2012", periods=n)
df["dates3"] = date_range("1/1/2014", periods=n)
df.loc[np.random.rand(n) > 0.5, "dates1"] = pd.NaT
return_value = df.set_index("dates1", inplace=True, drop=True)
with tm.assert_produces_warning(FutureWarning, match="The 'inplace' keyword"):
return_value = df.set_index("dates1", inplace=True, drop=True)
assert return_value is None
msg = r"'BoolOp' nodes are not implemented"
with pytest.raises(NotImplementedError, match=msg):
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/groupby/test_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -678,7 +678,7 @@ def test_apply_groupby_datetimeindex():
result = df.groupby("Name").sum()

expected = DataFrame({"Name": ["A", "B", "C"], "Value": [10, 50, 90]})
expected.set_index("Name", inplace=True)
expected = expected.set_index("Name", copy=False)

tm.assert_frame_equal(result, expected)

Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/groupby/test_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def test_builtins_apply(keys, f):

if f != sum:
expected = gb.agg(fname).reset_index()
expected.set_index(keys, inplace=True, drop=False)
expected = expected.set_index(keys, copy=False, drop=False)
tm.assert_frame_equal(result, expected, check_dtype=False)

tm.assert_series_equal(getattr(result, fname)(), getattr(df, fname)())
Expand Down Expand Up @@ -454,7 +454,7 @@ def test_groupby_non_arithmetic_agg_types(dtype, method, data):
df_out = DataFrame(exp)

df_out["b"] = df_out.b.astype(out_type)
df_out.set_index("a", inplace=True)
df_out = df_out.set_index("a", copy=False)

grpd = df.groupby("a")
t = getattr(grpd, method)(*data["args"])
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/indexes/multi/test_reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def test_insert(idx):
idx.insert(0, ("foo2",))

left = pd.DataFrame([["a", "b", 0], ["b", "d", 1]], columns=["1st", "2nd", "3rd"])
left.set_index(["1st", "2nd"], inplace=True)
left = left.set_index(["1st", "2nd"], copy=False)
ts = left["3rd"].copy(deep=True)

left.loc[("b", "x"), "3rd"] = 2
Expand Down Expand Up @@ -65,7 +65,7 @@ def test_insert(idx):
],
columns=["1st", "2nd", "3rd"],
)
right.set_index(["1st", "2nd"], inplace=True)
right = right.set_index(["1st", "2nd"], copy=False)
# FIXME data types changes to float because
# of intermediate nan insertion;
tm.assert_frame_equal(left, right, check_dtype=False)
Expand Down
7 changes: 5 additions & 2 deletions pandas/tests/indexing/multiindex/test_indexing_slow.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,15 +60,18 @@ def validate(mi, df, key):
assert key[: i + 1] in mi.index
right = df[mask].copy()

msg = "The 'inplace' keyword in DataFrame.set_index is deprecated"
if i + 1 != len(key): # partial key
return_value = right.drop(cols[: i + 1], axis=1, inplace=True)
assert return_value is None
return_value = right.set_index(cols[i + 1 : -1], inplace=True)
with tm.assert_produces_warning(FutureWarning, match=msg):
return_value = right.set_index(cols[i + 1 : -1], inplace=True)
assert return_value is None
tm.assert_frame_equal(mi.loc[key[: i + 1]], right)

else: # full key
return_value = right.set_index(cols[:-1], inplace=True)
with tm.assert_produces_warning(FutureWarning, match=msg):
return_value = right.set_index(cols[:-1], inplace=True)
assert return_value is None
if len(right) == 1: # single hit
right = Series(
Expand Down
2 changes: 1 addition & 1 deletion pandas/tests/indexing/multiindex/test_multiindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ def test_multiindex_complex(self):
"z": non_complex_data,
}
)
result.set_index(["x", "y"], inplace=True)
result = result.set_index(["x", "y"], copy=False)
expected = DataFrame(
{"z": non_complex_data},
index=MultiIndex.from_arrays(
Expand Down
4 changes: 2 additions & 2 deletions pandas/tests/io/pytables/test_append.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ def test_append_series(setup_path):
mi["B"] = np.arange(len(mi))
mi["C"] = "foo"
mi.loc[3:5, "C"] = "bar"
mi.set_index(["C", "B"], inplace=True)
mi = mi.set_index(["C", "B"], copy=False)
s = mi.stack()
s.index = s.index.droplevel(2)
store.append("mi", s)
Expand Down Expand Up @@ -326,7 +326,7 @@ def test_append_with_different_block_ordering(setup_path):
a = df.pop("A")
df["A"] = a

df.set_index("index", inplace=True)
df = df.set_index("index", copy=False)

store.append("df", df)

Expand Down
Loading

0 comments on commit 1151c56

Please sign in to comment.