Skip to content

Commit

Permalink
Update tests
Browse files Browse the repository at this point in the history
  • Loading branch information
stinodego committed Jan 6, 2024
1 parent 5d8a5cc commit ae20037
Show file tree
Hide file tree
Showing 23 changed files with 384 additions and 369 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
datetime(2022, 12, 1), datetime(2022, 12, 1, 0, 0, 2), "1s", eager=True
),
}
).with_row_count("rn")
).with_row_number("rn")
print(df)
# --8<-- [end:selectors_df]

Expand Down
2 changes: 1 addition & 1 deletion py-polars/docs/source/reference/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ Examples
self._df = df
def by_alternate_rows(self) -> list[pl.DataFrame]:
df = self._df.with_row_count(name="n")
df = self._df.with_row_number(name="n")
return [
df.filter((pl.col("n") % 2) == 0).drop("n"),
df.filter((pl.col("n") % 2) != 0).drop("n"),
Expand Down
22 changes: 13 additions & 9 deletions py-polars/polars/dataframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -5227,15 +5227,15 @@ def with_row_number(self, name: str = "row_number", offset: int = 0) -> Self:
... )
>>> df.with_row_number()
shape: (3, 3)
┌────────┬─────┬─────┐
┌────────────┬─────┬─────┐
│ row_number ┆ a ┆ b │
│ --- ┆ --- ┆ --- │
│ u32 ┆ i64 ┆ i64 │
╞════════╪═════╪═════╡
│ 0 ┆ 1 ┆ 2 │
│ 1 ┆ 3 ┆ 4 │
│ 2 ┆ 5 ┆ 6 │
└────────┴─────┴─────┘
│ --- ┆ --- ┆ --- │
│ u32 ┆ i64 ┆ i64 │
╞════════════╪═════╪═════╡
│ 0 ┆ 1 ┆ 2 │
│ 1 ┆ 3 ┆ 4 │
│ 2 ┆ 5 ┆ 6 │
└────────────┴─────┴─────┘
"""
return self._from_pydf(self._df.with_row_number(name, offset))

Expand All @@ -5248,6 +5248,10 @@ def with_row_count(self, name: str = "row_nr", offset: int = 0) -> Self:
"""
Add a column at index 0 that counts the rows.
.. deprecated::
Use `meth`:with_row_number` instead.
Note that the default column name has changed from 'row_nr' to 'row_number'.
Parameters
----------
name
Expand All @@ -5263,7 +5267,7 @@ def with_row_count(self, name: str = "row_nr", offset: int = 0) -> Self:
... "b": [2, 4, 6],
... }
... )
>>> df.with_row_number()
>>> df.with_row_count() # doctest: +SKIP
shape: (3, 3)
┌────────┬─────┬─────┐
│ row_nr ┆ a ┆ b │
Expand Down
544 changes: 272 additions & 272 deletions py-polars/polars/expr/expr.py

Large diffs are not rendered by default.

4 changes: 2 additions & 2 deletions py-polars/polars/io/ipc/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def read_ipc(
tbl = pa.feather.read_table(data, memory_map=memory_map, columns=columns)
df = pl.DataFrame._from_arrow(tbl, rechunk=rechunk)
if row_count_name is not None:
df = df.with_row_count(row_count_name, row_count_offset)
df = df.with_row_number(row_count_name, row_count_offset)
if n_rows is not None:
df = df.slice(0, n_rows)
return df
Expand Down Expand Up @@ -169,7 +169,7 @@ def read_ipc_stream(
tbl = reader.read_all()
df = pl.DataFrame._from_arrow(tbl, rechunk=rechunk)
if row_count_name is not None:
df = df.with_row_count(row_count_name, row_count_offset)
df = df.with_row_number(row_count_name, row_count_offset)
if n_rows is not None:
df = df.slice(0, n_rows)
return df
Expand Down
24 changes: 14 additions & 10 deletions py-polars/polars/lazyframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -4590,15 +4590,15 @@ def with_row_number(self, name: str = "row_number", offset: int = 0) -> Self:
... )
>>> lf.with_row_number().collect()
shape: (3, 3)
┌────────┬─────┬─────┐
row_nr ┆ a ┆ b │
│ --- ┆ --- ┆ --- │
│ u32 ┆ i64 ┆ i64 │
╞════════╪═════╪═════╡
│ 0 ┆ 1 ┆ 2 │
│ 1 ┆ 3 ┆ 4 │
│ 2 ┆ 5 ┆ 6 │
└────────┴─────┴─────┘
┌────────────┬─────┬─────┐
row_number ┆ a ┆ b │
│ --- ┆ --- ┆ --- │
│ u32 ┆ i64 ┆ i64 │
╞════════════╪═════╪═════╡
│ 0 ┆ 1 ┆ 2 │
│ 1 ┆ 3 ┆ 4 │
│ 2 ┆ 5 ┆ 6 │
└────────────┴─────┴─────┘
"""
return self._from_pyldf(self._ldf.with_row_number(name, offset))

Expand All @@ -4611,6 +4611,10 @@ def with_row_count(self, name: str = "row_nr", offset: int = 0) -> Self:
"""
Add a column at index 0 that counts the rows.
.. deprecated::
Use `meth`:with_row_number` instead.
Note that the default column name has changed from 'row_nr' to 'row_number'.
Parameters
----------
name
Expand All @@ -4631,7 +4635,7 @@ def with_row_count(self, name: str = "row_nr", offset: int = 0) -> Self:
... "b": [2, 4, 6],
... }
... )
>>> lf.with_row_number().collect()
>>> lf.with_row_count().collect() # doctest: +SKIP
shape: (3, 3)
┌────────┬─────┬─────┐
│ row_nr ┆ a ┆ b │
Expand Down
4 changes: 2 additions & 2 deletions py-polars/polars/series/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -4523,8 +4523,8 @@ def scatter(
It is better to implement this as follows:
>>> s.to_frame().with_row_count("row_nr").select(
... pl.when(pl.col("row_nr") == 1).then(10).otherwise(pl.col("a"))
>>> s.to_frame().with_row_number().select(
... pl.when(pl.col("row_number") == 1).then(10).otherwise(pl.col("a"))
... )
shape: (3, 1)
┌─────────┐
Expand Down
18 changes: 15 additions & 3 deletions py-polars/tests/unit/dataframe/test_df.py
Original file line number Diff line number Diff line change
Expand Up @@ -1679,13 +1679,25 @@ def test_select_by_dtype(df: pl.DataFrame) -> None:
}


def test_with_row_count() -> None:
def test_with_row_number() -> None:
df = pl.DataFrame({"a": [1, 1, 3], "b": [1.0, 2.0, 2.0]})

out = df.with_row_count()
out = df.with_row_number()
assert out["row_number"].to_list() == [0, 1, 2]

out = df.lazy().with_row_number().collect()
assert out["row_number"].to_list() == [0, 1, 2]


def test_with_row_count_deprecated() -> None:
df = pl.DataFrame({"a": [1, 1, 3], "b": [1.0, 2.0, 2.0]})

with pytest.deprecated_call():
out = df.with_row_count()
assert out["row_nr"].to_list() == [0, 1, 2]

out = df.lazy().with_row_count().collect()
with pytest.deprecated_call():
out = df.lazy().with_row_count().collect()
assert out["row_nr"].to_list() == [0, 1, 2]


Expand Down
10 changes: 5 additions & 5 deletions py-polars/tests/unit/io/test_lazy_csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def test_row_count(foods_file_path: Path) -> None:

df = (
pl.scan_csv(foods_file_path, row_count_name="row_count")
.with_row_count("foo", 10)
.with_row_number("foo", 10)
.filter(pl.col("category") == pl.lit("vegetables"))
.collect()
)
Expand Down Expand Up @@ -195,13 +195,13 @@ def test_lazy_n_rows(foods_file_path: Path) -> None:
def test_lazy_row_count_no_push_down(foods_file_path: Path) -> None:
plan = (
pl.scan_csv(foods_file_path)
.with_row_count()
.filter(pl.col("row_nr") == 1)
.with_row_number()
.filter(pl.col("row_number") == 1)
.filter(pl.col("category") == pl.lit("vegetables"))
.explain(predicate_pushdown=True)
)
# related to row count is not pushed.
assert 'FILTER [(col("row_nr")) == (1)] FROM' in plan
assert 'FILTER [(col("row_number")) == (1)] FROM' in plan
# unrelated to row count is pushed.
assert 'SELECTION: [(col("category")) == (String(vegetables))]' in plan

Expand Down Expand Up @@ -283,5 +283,5 @@ def test_scan_empty_csv_with_row_count(tmp_path: Path) -> None:
df = pl.DataFrame({"a": []})
df.write_csv(file_path)

read = pl.scan_csv(file_path).with_row_count("idx")
read = pl.scan_csv(file_path).with_row_number("idx")
assert read.collect().schema == OrderedDict([("idx", pl.UInt32), ("a", pl.String)])
2 changes: 1 addition & 1 deletion py-polars/tests/unit/io/test_lazy_ipc.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def test_row_count(foods_ipc_path: Path) -> None:

df = (
pl.scan_ipc(foods_ipc_path, row_count_name="row_count")
.with_row_count("foo", 10)
.with_row_number("foo", 10)
.filter(pl.col("category") == pl.lit("vegetables"))
.collect()
)
Expand Down
2 changes: 1 addition & 1 deletion py-polars/tests/unit/io/test_lazy_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def test_scan_ndjson(foods_ndjson_path: Path) -> None:

df = (
pl.scan_ndjson(foods_ndjson_path, row_count_name="row_count")
.with_row_count("foo", 10)
.with_row_number("foo", 10)
.filter(pl.col("category") == pl.lit("vegetables"))
.collect()
)
Expand Down
4 changes: 2 additions & 2 deletions py-polars/tests/unit/io/test_lazy_parquet.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def test_row_count(foods_parquet_path: Path) -> None:

df = (
pl.scan_parquet(foods_parquet_path, row_count_name="row_count")
.with_row_count("foo", 10)
.with_row_number("foo", 10)
.filter(pl.col("category") == pl.lit("vegetables"))
.collect()
)
Expand Down Expand Up @@ -407,7 +407,7 @@ def test_row_count_empty_file(tmp_path: Path) -> None:
file_path = tmp_path / "test.parquet"
df = pl.DataFrame({"a": []}, schema={"a": pl.Float32})
df.write_parquet(file_path)
result = pl.scan_parquet(file_path).with_row_count("idx").collect()
result = pl.scan_parquet(file_path).with_row_number("idx").collect()
assert result.schema == OrderedDict([("idx", pl.UInt32), ("a", pl.Float32)])


Expand Down
20 changes: 10 additions & 10 deletions py-polars/tests/unit/operations/rolling/test_rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,11 +275,11 @@ def test_rolling_group_by_extrema() -> None:
{
"col1": pl.arange(0, 7, eager=True).reverse(),
}
).with_columns(pl.col("col1").reverse().alias("row_nr"))
).with_columns(pl.col("col1").reverse().alias("row_number"))

assert (
df.rolling(
index_column="row_nr",
index_column="row_number",
period="3i",
)
.agg(
Expand Down Expand Up @@ -314,11 +314,11 @@ def test_rolling_group_by_extrema() -> None:
{
"col1": pl.arange(0, 7, eager=True),
}
).with_columns(pl.col("col1").alias("row_nr"))
).with_columns(pl.col("col1").alias("row_number"))

assert (
df.rolling(
index_column="row_nr",
index_column="row_number",
period="3i",
)
.agg(
Expand Down Expand Up @@ -352,11 +352,11 @@ def test_rolling_group_by_extrema() -> None:
{
"col1": pl.arange(0, 7, eager=True).shuffle(1),
}
).with_columns(pl.col("col1").sort().alias("row_nr"))
).with_columns(pl.col("col1").sort().alias("row_number"))

assert (
df.rolling(
index_column="row_nr",
index_column="row_number",
period="3i",
)
.agg(
Expand Down Expand Up @@ -629,12 +629,12 @@ def test_rolling_aggregations_with_over_11225() -> None:
"date": [start + timedelta(days=k) for k in range(5)],
"group": ["A"] * 2 + ["B"] * 3,
}
).with_row_count()
).with_row_number()

df_temporal = df_temporal.sort("group", "date")

result = df_temporal.with_columns(
rolling_row_mean=pl.col("row_nr")
rolling_row_mean=pl.col("row_number")
.rolling_mean(
window_size="2d",
by="date",
Expand All @@ -645,12 +645,12 @@ def test_rolling_aggregations_with_over_11225() -> None:
)
expected = pl.DataFrame(
{
"row_nr": [0, 1, 2, 3, 4],
"row_number": [0, 1, 2, 3, 4],
"date": pl.datetime_range(date(2001, 1, 1), date(2001, 1, 5), eager=True),
"group": ["A", "A", "B", "B", "B"],
"rolling_row_mean": [None, 0.0, None, 2.0, 2.5],
},
schema_overrides={"row_nr": pl.UInt32},
schema_overrides={"row_number": pl.UInt32},
)
assert_frame_equal(result, expected)

Expand Down
18 changes: 9 additions & 9 deletions py-polars/tests/unit/operations/test_explode.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,10 +89,10 @@ def test_explode_empty_list_4003() -> None:


def test_explode_empty_list_4107() -> None:
df = pl.DataFrame({"b": [[1], [2], []] * 2}).with_row_count()
df = pl.DataFrame({"b": [[1], [2], []] * 2}).with_row_number()

assert_frame_equal(
df.explode(["b"]), df.explode(["b"]).drop("row_nr").with_row_count()
df.explode(["b"]), df.explode(["b"]).drop("row_number").with_row_number()
)


Expand All @@ -112,15 +112,15 @@ def test_explode_correct_for_slice() -> None:
)
)
.sort("group")
.with_row_count()
.with_row_number()
)
expected = pl.DataFrame(
{
"row_nr": [0, 0, 0, 1, 1, 2, 3, 3, 3, 4, 5, 5, 5, 6, 6, 7, 8, 8, 8, 9],
"row_number": [0, 0, 0, 1, 1, 2, 3, 3, 3, 4, 5, 5, 5, 6, 6, 7, 8, 8, 8, 9],
"group": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
"b": [1, 2, 3, 2, 3, 4, 1, 2, 3, 0, 1, 2, 3, 2, 3, 4, 1, 2, 3, 0],
},
schema_overrides={"row_nr": pl.UInt32},
schema_overrides={"row_number": pl.UInt32},
)
assert_frame_equal(df.slice(0, 10).explode(["b"]), expected)

Expand Down Expand Up @@ -215,12 +215,12 @@ def test_explode_in_agg_context() -> None:
)

assert (
df.with_row_count("row_nr")
df.with_row_number()
.explode("idxs")
.group_by("row_nr")
.group_by("row_number")
.agg(pl.col("array").flatten())
).to_dict(as_series=False) == {
"row_nr": [0, 1, 2],
"row_number": [0, 1, 2],
"array": [[0.0, 3.5], [4.6, 0.0], [0.0, 7.8, 0.0, 0.0, 7.8, 0.0]],
}

Expand Down Expand Up @@ -281,7 +281,7 @@ def test_explode_invalid_element_count() -> None:
"col1": [["X", "Y", "Z"], ["F", "G"], ["P"]],
"col2": [["A", "B", "C"], ["C"], ["D", "E"]],
}
).with_row_count()
).with_row_number()
with pytest.raises(
pl.ShapeError, match=r"exploded columns must have matching element counts"
):
Expand Down
6 changes: 3 additions & 3 deletions py-polars/tests/unit/operations/test_group_by.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,17 +277,17 @@ def test_arg_sort_sort_by_groups_update__4360() -> None:


def test_unique_order() -> None:
df = pl.DataFrame({"a": [1, 2, 1]}).with_row_count()
df = pl.DataFrame({"a": [1, 2, 1]}).with_row_number()
assert df.unique(keep="last", subset="a", maintain_order=True).to_dict(
as_series=False
) == {
"row_nr": [1, 2],
"row_number": [1, 2],
"a": [2, 1],
}
assert df.unique(keep="first", subset="a", maintain_order=True).to_dict(
as_series=False
) == {
"row_nr": [0, 1],
"row_number": [0, 1],
"a": [1, 2],
}

Expand Down
Loading

0 comments on commit ae20037

Please sign in to comment.