From 152520909f92af2d908d8efc195385046f235950 Mon Sep 17 00:00:00 2001 From: J van Zundert Date: Sat, 25 Feb 2023 19:12:04 +0000 Subject: [PATCH] refactor(python): Deprecate pl.get_dummies (#7055) --- py-polars/polars/internals/functions.py | 8 ++++++++ py-polars/tests/unit/datatypes/test_categorical.py | 2 +- py-polars/tests/unit/test_df.py | 12 +++++++++--- 3 files changed, 18 insertions(+), 4 deletions(-) diff --git a/py-polars/polars/internals/functions.py b/py-polars/polars/internals/functions.py index ea32416a39ec..f987e965ead0 100644 --- a/py-polars/polars/internals/functions.py +++ b/py-polars/polars/internals/functions.py @@ -45,6 +45,9 @@ def get_dummies( """ Convert categorical variables into dummy/indicator variables. + .. deprecated:: 0.16.8 + `pl.get_dummies(df)` has been deprecated; use `df.to_dummies()` + Parameters ---------- df @@ -76,6 +79,11 @@ def get_dummies( └───────┴───────┴───────┴───────┴───────┴───────┘ """ + warnings.warn( + "`pl.get_dummies(df)` has been deprecated; use `df.to_dummies()`", + category=DeprecationWarning, + stacklevel=2, + ) return df.to_dummies(columns=columns, separator=separator) diff --git a/py-polars/tests/unit/datatypes/test_categorical.py b/py-polars/tests/unit/datatypes/test_categorical.py index 716570e9eff9..1b9fb4b65a3d 100644 --- a/py-polars/tests/unit/datatypes/test_categorical.py +++ b/py-polars/tests/unit/datatypes/test_categorical.py @@ -64,7 +64,7 @@ def test_read_csv_categorical() -> None: def test_cat_to_dummies() -> None: df = pl.DataFrame({"foo": [1, 2, 3, 4], "bar": ["a", "b", "a", "c"]}) df = df.with_columns(pl.col("bar").cast(pl.Categorical)) - assert pl.get_dummies(df).to_dict(False) == { + assert df.to_dummies().to_dict(False) == { "foo_1": [1, 0, 0, 0], "foo_2": [0, 1, 0, 0], "foo_3": [0, 0, 1, 0], diff --git a/py-polars/tests/unit/test_df.py b/py-polars/tests/unit/test_df.py index 357a69e211a1..c92fd5e2725e 100644 --- a/py-polars/tests/unit/test_df.py +++ b/py-polars/tests/unit/test_df.py @@ -800,9 +800,9 @@ def test_arg_where() -> None: assert_series_equal(pl.arg_where(s, eager=True).cast(int), pl.Series([0, 2])) -def test_get_dummies() -> None: +def test_to_dummies2() -> None: df = pl.DataFrame({"a": [1, 2, 3]}) - res = pl.get_dummies(df) + res = df.to_dummies() expected = pl.DataFrame( {"a_1": [1, 0, 0], "a_2": [0, 1, 0], "a_3": [0, 0, 1]} ).with_columns(pl.all().cast(pl.UInt8)) @@ -820,10 +820,16 @@ def test_get_dummies() -> None: }, schema={"i": pl.Int32, "category|cat": pl.UInt8, "category|dog": pl.UInt8}, ) - result = pl.get_dummies(df, columns=["category"], separator="|") + result = df.to_dummies(columns=["category"], separator="|") assert_frame_equal(result, expected) +def test_get_dummies_function_deprecated() -> None: + df = pl.DataFrame({"a": [1, 2, 3]}) + with pytest.deprecated_call(): + pl.get_dummies(df) + + def test_to_pandas(df: pl.DataFrame) -> None: # pyarrow cannot deal with unsigned dictionary integer yet. # pyarrow cannot convert a time64 w/ non-zero nanoseconds