Skip to content

Commit

Permalink
feat(python): add "map_dict" method for Series
Browse files Browse the repository at this point in the history
  • Loading branch information
alexander-beedie committed Feb 16, 2023
1 parent d94688d commit cc64734
Show file tree
Hide file tree
Showing 6 changed files with 80 additions and 15 deletions.
1 change: 1 addition & 0 deletions py-polars/docs/source/reference/series/computation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ Computation
Series.kurtosis
Series.log
Series.log10
Series.map_dict
Series.pct_change
Series.peak_max
Series.peak_min
Expand Down
2 changes: 1 addition & 1 deletion py-polars/polars/internals/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -1233,7 +1233,7 @@ def pandas_has_default_index(df: pd.DataFrame) -> bool:

index_cols = df.index.names

if len(index_cols) > 1: # noqa: SIM114
if len(index_cols) > 1:
return False # not default: more than one index
elif index_cols not in ([None], [""]):
return False # not default: index is named
Expand Down
15 changes: 7 additions & 8 deletions py-polars/polars/internals/expr/expr.py
Original file line number Diff line number Diff line change
Expand Up @@ -6047,9 +6047,9 @@ def map_dict(
Parameters
----------
remapping
Mapping dictionary to use for remapping the values.
Dictionary containing the before/after values to map.
default
Value to use when original value was not found in remapping dictionary.
Value to use when the remapping dict does not contain the lookup value.
Warnings
--------
Expand Down Expand Up @@ -6097,7 +6097,7 @@ def map_dict(
│ 3 ┆ DE ┆ Germany │
└────────┴──────────────┴───────────────┘
Set a default value for values that couldn't be mapped.
Set a default value for values that cannot be mapped...
>>> df.with_columns(
... pl.col("country_code")
Expand All @@ -6116,7 +6116,7 @@ def map_dict(
│ 3 ┆ DE ┆ Germany │
└────────┴──────────────┴───────────────┘
Keep the original value for values that couldn't be mapped.
...or keep the original value:
>>> df.with_columns(
... pl.col("country_code")
Expand All @@ -6135,10 +6135,9 @@ def map_dict(
│ 3 ┆ DE ┆ Germany │
└────────┴──────────────┴───────────────┘
If you need to access different columns to set a default value for values that
couldn't be mapped, a struct needs to be constructed, with in the first field
the column that you want to remap and the rest of the fields the other columns
you use in the default expression.
If you need to access different columns to set a default value, a struct needs
to be constructed; in the first field is the column that you want to remap and
the rest of the fields are the other columns used in the default expression.
>>> df.with_columns(
... pl.struct(pl.col(["country_code", "row_nr"])).map_dict(
Expand Down
61 changes: 56 additions & 5 deletions py-polars/polars/internals/series/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -4780,6 +4780,57 @@ def clip_max(self, max_val: int | float) -> Series:
"""

def map_dict(
self,
remapping: dict[Any, Any],
*,
default: Any = None,
) -> Self:
"""
Replace values in the Series using a remapping dictionary.
Parameters
----------
remapping
Dictionary containing the before/after values to map.
default
Value to use when the remapping dict does not contain the lookup value.
Examples
--------
>>> s = pl.Series("iso3166", ["TUR", "???", "JPN", "NLD"])
>>> country_lookup = {
... "JPN": "Japan",
... "TUR": "Türkiye",
... "NLD": "Netherlands",
... }
Remap, setting a default value for unrecognised values...
>>> s.map_dict(country_lookup, default="Unspecified").rename("country_name")
shape: (4,)
Series: 'country_name' [str]
[
"Türkiye"
"Unspecified"
"Japan"
"Netherlands"
]
...or keep the original value:
>>> s.map_dict(country_lookup, default=s).rename("country_name")
shape: (4,)
Series: 'country_name' [str]
[
"Türkiye"
"???"
"Japan"
"Netherlands"
]
"""

def reshape(self, dims: tuple[int, ...]) -> Series:
"""
Reshape this Series to a flat Series or a Series of Lists.
Expand Down Expand Up @@ -5136,6 +5187,11 @@ def arr(self) -> ListNameSpace:
"""Create an object namespace of all list related methods."""
return ListNameSpace(self)

@property
def bin(self) -> BinaryNameSpace:
"""Create an object namespace of all binary related methods."""
return BinaryNameSpace(self)

@property
def cat(self) -> CatNameSpace:
"""Create an object namespace of all categorical related methods."""
Expand All @@ -5151,11 +5207,6 @@ def str(self) -> StringNameSpace:
"""Create an object namespace of all string related methods."""
return StringNameSpace(self)

@property
def bin(self) -> BinaryNameSpace:
"""Create an object namespace of all binary related methods."""
return BinaryNameSpace(self)

@property
def struct(self) -> StructNameSpace:
"""Create an object namespace of all struct related methods."""
Expand Down
2 changes: 1 addition & 1 deletion py-polars/tests/unit/test_exprs.py
Original file line number Diff line number Diff line change
Expand Up @@ -508,7 +508,7 @@ def test_map_dict() -> None:

assert (
df.with_columns(
pl.struct(pl.col(["country_code", "row_nr"])) # type: ignore[union-attr]
pl.struct(pl.col(["country_code", "row_nr"]))
.map_dict(
country_code_dict,
default=pl.col("row_nr").cast(pl.Utf8),
Expand Down
14 changes: 14 additions & 0 deletions py-polars/tests/unit/test_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -2392,3 +2392,17 @@ def test_is_between() -> None:
True,
False,
]


def test_map_dict() -> None:
s = pl.Series("s", [-1, 2, None, 4, -5])
remap = {1: "one", 2: "two", 3: "three", 4: "four", 5: "five"}

assert_series_equal(
s.abs().map_dict(remap, default="?"),
pl.Series("s", ["one", "two", "?", "four", "five"]),
)
assert_series_equal(
s.map_dict(remap, default=s.cast(pl.Utf8)),
pl.Series("s", ["-1", "two", None, "four", "-5"]),
)

0 comments on commit cc64734

Please sign in to comment.