Skip to content

Commit

Permalink
Improve docstrings and handle bad offsets
Browse files Browse the repository at this point in the history
  • Loading branch information
stinodego committed Jan 8, 2024
1 parent c83565e commit f4383c2
Show file tree
Hide file tree
Showing 3 changed files with 72 additions and 8 deletions.
29 changes: 25 additions & 4 deletions py-polars/polars/dataframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -5207,14 +5207,19 @@ def pipe(

def with_row_index(self, name: str = "index", offset: int = 0) -> Self:
"""
Add a column at index 0 that counts the rows.
Add a row index as the first column in the DataFrame.
Parameters
----------
name
Name of the column to add.
Name of the index column.
offset
Start the row count at this offset. Default = 0
Start the index at this offset. Cannot be negative.
Notes
-----
The resulting column does not have any special properties. It is a regular
column of type `UInt32` (or `UInt64` in `polars-u64-idx`).
Examples
--------
Expand All @@ -5235,8 +5240,24 @@ def with_row_index(self, name: str = "index", offset: int = 0) -> Self:
│ 1 ┆ 3 ┆ 4 │
│ 2 ┆ 5 ┆ 6 │
└───────┴─────┴─────┘
>>> df.with_row_index("id", offset=1000)
shape: (3, 3)
┌──────┬─────┬─────┐
│ id ┆ a ┆ b │
│ --- ┆ --- ┆ --- │
│ u32 ┆ i64 ┆ i64 │
╞══════╪═════╪═════╡
│ 1000 ┆ 1 ┆ 2 │
│ 1001 ┆ 3 ┆ 4 │
│ 1002 ┆ 5 ┆ 6 │
└──────┴─────┴─────┘
"""
return self._from_pydf(self._df.with_row_index(name, offset))
try:
return self._from_pydf(self._df.with_row_index(name, offset))
except OverflowError:
issue = "negative" if offset < 0 else "greater than the maximum index value"
msg = f"`offset` input for `with_row_index` cannot be {issue}, got {offset}"
raise ValueError(msg) from None

@deprecate_function(
"Use `with_row_index` instead."
Expand Down
29 changes: 25 additions & 4 deletions py-polars/polars/lazyframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -4565,20 +4565,25 @@ def approx_n_unique(self) -> Self:

def with_row_index(self, name: str = "index", offset: int = 0) -> Self:
"""
Add a column at index 0 with the row number.
Add a row index as the first column in the LazyFrame.
Parameters
----------
name
Name of the column to add.
Name of the index column.
offset
Start the row count at this offset.
Start the index at this offset. Cannot be negative.
Warnings
--------
Using this function can have a negative effect on query performance.
This may, for instance, block predicate pushdown optimization.
Notes
-----
The resulting column does not have any special properties. It is a regular
column of type `UInt32` (or `UInt64` in `polars-u64-idx`).
Examples
--------
>>> lf = pl.LazyFrame(
Expand All @@ -4598,8 +4603,24 @@ def with_row_index(self, name: str = "index", offset: int = 0) -> Self:
│ 1 ┆ 3 ┆ 4 │
│ 2 ┆ 5 ┆ 6 │
└───────┴─────┴─────┘
>>> lf.with_row_index("id", offset=1000).collect()
shape: (3, 3)
┌──────┬─────┬─────┐
│ id ┆ a ┆ b │
│ --- ┆ --- ┆ --- │
│ u32 ┆ i64 ┆ i64 │
╞══════╪═════╪═════╡
│ 1000 ┆ 1 ┆ 2 │
│ 1001 ┆ 3 ┆ 4 │
│ 1002 ┆ 5 ┆ 6 │
└──────┴─────┴─────┘
"""
return self._from_pyldf(self._ldf.with_row_index(name, offset))
try:
return self._from_pyldf(self._ldf.with_row_index(name, offset))
except OverflowError:
issue = "negative" if offset < 0 else "greater than the maximum index value"
msg = f"`offset` input for `with_row_index` cannot be {issue}, got {offset}"
raise ValueError(msg) from None

@deprecate_function(
"Use `with_row_index` instead."
Expand Down
22 changes: 22 additions & 0 deletions py-polars/tests/unit/dataframe/test_df.py
Original file line number Diff line number Diff line change
Expand Up @@ -1689,6 +1689,28 @@ def test_with_row_index() -> None:
assert out["index"].to_list() == [0, 1, 2]


def test_with_row_index_bad_offset() -> None:
df = pl.DataFrame({"a": [1, 1, 3], "b": [1.0, 2.0, 2.0]})

with pytest.raises(ValueError, match="cannot be negative"):
df.with_row_index(offset=-1)
with pytest.raises(
ValueError, match="cannot be greater than the maximum index value"
):
df.with_row_index(offset=2**32)


def test_with_row_index_bad_offset_lazy() -> None:
lf = pl.LazyFrame({"a": [1, 1, 3], "b": [1.0, 2.0, 2.0]})

with pytest.raises(ValueError, match="cannot be negative"):
lf.with_row_index(offset=-1)
with pytest.raises(
ValueError, match="cannot be greater than the maximum index value"
):
lf.with_row_index(offset=2**32)


def test_with_row_count_deprecated() -> None:
df = pl.DataFrame({"a": [1, 1, 3], "b": [1.0, 2.0, 2.0]})

Expand Down

0 comments on commit f4383c2

Please sign in to comment.