Update tests

pola-rs · Jan 6, 2024 · ae20037 · ae20037
1 parent 5d8a5cc
commit ae20037
Show file tree

Hide file tree

Showing 23 changed files with 384 additions and 369 deletions.
diff --git a/docs/src/python/user-guide/expressions/column-selections.py b/docs/src/python/user-guide/expressions/column-selections.py
@@ -17,7 +17,7 @@
             datetime(2022, 12, 1), datetime(2022, 12, 1, 0, 0, 2), "1s", eager=True
         ),
     }
-).with_row_count("rn")
+).with_row_number("rn")
 print(df)
 # --8<-- [end:selectors_df]
 

diff --git a/py-polars/docs/source/reference/api.rst b/py-polars/docs/source/reference/api.rst
@@ -84,7 +84,7 @@ Examples
                     self._df = df
 
                 def by_alternate_rows(self) -> list[pl.DataFrame]:
-                    df = self._df.with_row_count(name="n")
+                    df = self._df.with_row_number(name="n")
                     return [
                         df.filter((pl.col("n") % 2) == 0).drop("n"),
                         df.filter((pl.col("n") % 2) != 0).drop("n"),

diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py
@@ -5227,15 +5227,15 @@ def with_row_number(self, name: str = "row_number", offset: int = 0) -> Self:
         ... )
         >>> df.with_row_number()
         shape: (3, 3)
-        ┌────────┬─────┬─────┐
+        ┌────────────┬─────┬─────┐
         │ row_number ┆ a   ┆ b   │
-        │ ---    ┆ --- ┆ --- │
-        │ u32    ┆ i64 ┆ i64 │
-        ╞════════╪═════╪═════╡
-        │ 0      ┆ 1   ┆ 2   │
-        │ 1      ┆ 3   ┆ 4   │
-        │ 2      ┆ 5   ┆ 6   │
-        └────────┴─────┴─────┘
+        │ ---        ┆ --- ┆ --- │
+        │ u32        ┆ i64 ┆ i64 │
+        ╞════════════╪═════╪═════╡
+        │ 0          ┆ 1   ┆ 2   │
+        │ 1          ┆ 3   ┆ 4   │
+        │ 2          ┆ 5   ┆ 6   │
+        └────────────┴─────┴─────┘
         """
         return self._from_pydf(self._df.with_row_number(name, offset))
 
@@ -5248,6 +5248,10 @@ def with_row_count(self, name: str = "row_nr", offset: int = 0) -> Self:
         """
         Add a column at index 0 that counts the rows.
 
+        .. deprecated::
+            Use `meth`:with_row_number` instead.
+            Note that the default column name has changed from 'row_nr' to 'row_number'.
+
         Parameters
         ----------
         name
@@ -5263,7 +5267,7 @@ def with_row_count(self, name: str = "row_nr", offset: int = 0) -> Self:
         ...         "b": [2, 4, 6],
         ...     }
         ... )
-        >>> df.with_row_number()
+        >>> df.with_row_count()  # doctest: +SKIP
         shape: (3, 3)
         ┌────────┬─────┬─────┐
         │ row_nr ┆ a   ┆ b   │

diff --git a/py-polars/polars/expr/expr.py b/py-polars/polars/expr/expr.py
diff --git a/py-polars/polars/io/ipc/functions.py b/py-polars/polars/io/ipc/functions.py
@@ -93,7 +93,7 @@ def read_ipc(
             tbl = pa.feather.read_table(data, memory_map=memory_map, columns=columns)
             df = pl.DataFrame._from_arrow(tbl, rechunk=rechunk)
             if row_count_name is not None:
-                df = df.with_row_count(row_count_name, row_count_offset)
+                df = df.with_row_number(row_count_name, row_count_offset)
             if n_rows is not None:
                 df = df.slice(0, n_rows)
             return df
@@ -169,7 +169,7 @@ def read_ipc_stream(
                 tbl = reader.read_all()
                 df = pl.DataFrame._from_arrow(tbl, rechunk=rechunk)
                 if row_count_name is not None:
-                    df = df.with_row_count(row_count_name, row_count_offset)
+                    df = df.with_row_number(row_count_name, row_count_offset)
                 if n_rows is not None:
                     df = df.slice(0, n_rows)
                 return df

diff --git a/py-polars/polars/lazyframe/frame.py b/py-polars/polars/lazyframe/frame.py
@@ -4590,15 +4590,15 @@ def with_row_number(self, name: str = "row_number", offset: int = 0) -> Self:
         ... )
         >>> lf.with_row_number().collect()
         shape: (3, 3)
-        ┌────────┬─────┬─────┐
-        │ row_nr ┆ a   ┆ b   │
-        │ ---    ┆ --- ┆ --- │
-        │ u32    ┆ i64 ┆ i64 │
-        ╞════════╪═════╪═════╡
-        │ 0      ┆ 1   ┆ 2   │
-        │ 1      ┆ 3   ┆ 4   │
-        │ 2      ┆ 5   ┆ 6   │
-        └────────┴─────┴─────┘
+        ┌────────────┬─────┬─────┐
+        │ row_number ┆ a   ┆ b   │
+        │ ---        ┆ --- ┆ --- │
+        │ u32        ┆ i64 ┆ i64 │
+        ╞════════════╪═════╪═════╡
+        │ 0          ┆ 1   ┆ 2   │
+        │ 1          ┆ 3   ┆ 4   │
+        │ 2          ┆ 5   ┆ 6   │
+        └────────────┴─────┴─────┘
         """
         return self._from_pyldf(self._ldf.with_row_number(name, offset))
 
@@ -4611,6 +4611,10 @@ def with_row_count(self, name: str = "row_nr", offset: int = 0) -> Self:
         """
         Add a column at index 0 that counts the rows.
 
+        .. deprecated::
+            Use `meth`:with_row_number` instead.
+            Note that the default column name has changed from 'row_nr' to 'row_number'.
+
         Parameters
         ----------
         name
@@ -4631,7 +4635,7 @@ def with_row_count(self, name: str = "row_nr", offset: int = 0) -> Self:
         ...         "b": [2, 4, 6],
         ...     }
         ... )
-        >>> lf.with_row_number().collect()
+        >>> lf.with_row_count().collect()  # doctest: +SKIP
         shape: (3, 3)
         ┌────────┬─────┬─────┐
         │ row_nr ┆ a   ┆ b   │

diff --git a/py-polars/polars/series/series.py b/py-polars/polars/series/series.py
@@ -4523,8 +4523,8 @@ def scatter(
 
         It is better to implement this as follows:
 
-        >>> s.to_frame().with_row_count("row_nr").select(
-        ...     pl.when(pl.col("row_nr") == 1).then(10).otherwise(pl.col("a"))
+        >>> s.to_frame().with_row_number().select(
+        ...     pl.when(pl.col("row_number") == 1).then(10).otherwise(pl.col("a"))
         ... )
         shape: (3, 1)
         ┌─────────┐

diff --git a/py-polars/tests/unit/dataframe/test_df.py b/py-polars/tests/unit/dataframe/test_df.py
@@ -1679,13 +1679,25 @@ def test_select_by_dtype(df: pl.DataFrame) -> None:
         }
 
 
-def test_with_row_count() -> None:
+def test_with_row_number() -> None:
     df = pl.DataFrame({"a": [1, 1, 3], "b": [1.0, 2.0, 2.0]})
 
-    out = df.with_row_count()
+    out = df.with_row_number()
+    assert out["row_number"].to_list() == [0, 1, 2]
+
+    out = df.lazy().with_row_number().collect()
+    assert out["row_number"].to_list() == [0, 1, 2]
+
+
+def test_with_row_count_deprecated() -> None:
+    df = pl.DataFrame({"a": [1, 1, 3], "b": [1.0, 2.0, 2.0]})
+
+    with pytest.deprecated_call():
+        out = df.with_row_count()
     assert out["row_nr"].to_list() == [0, 1, 2]
 
-    out = df.lazy().with_row_count().collect()
+    with pytest.deprecated_call():
+        out = df.lazy().with_row_count().collect()
     assert out["row_nr"].to_list() == [0, 1, 2]
 
 

diff --git a/py-polars/tests/unit/io/test_lazy_csv.py b/py-polars/tests/unit/io/test_lazy_csv.py
@@ -67,7 +67,7 @@ def test_row_count(foods_file_path: Path) -> None:
 
     df = (
         pl.scan_csv(foods_file_path, row_count_name="row_count")
-        .with_row_count("foo", 10)
+        .with_row_number("foo", 10)
         .filter(pl.col("category") == pl.lit("vegetables"))
         .collect()
     )
@@ -195,13 +195,13 @@ def test_lazy_n_rows(foods_file_path: Path) -> None:
 def test_lazy_row_count_no_push_down(foods_file_path: Path) -> None:
     plan = (
         pl.scan_csv(foods_file_path)
-        .with_row_count()
-        .filter(pl.col("row_nr") == 1)
+        .with_row_number()
+        .filter(pl.col("row_number") == 1)
         .filter(pl.col("category") == pl.lit("vegetables"))
         .explain(predicate_pushdown=True)
     )
     # related to row count is not pushed.
-    assert 'FILTER [(col("row_nr")) == (1)] FROM' in plan
+    assert 'FILTER [(col("row_number")) == (1)] FROM' in plan
     # unrelated to row count is pushed.
     assert 'SELECTION: [(col("category")) == (String(vegetables))]' in plan
 
@@ -283,5 +283,5 @@ def test_scan_empty_csv_with_row_count(tmp_path: Path) -> None:
     df = pl.DataFrame({"a": []})
     df.write_csv(file_path)
 
-    read = pl.scan_csv(file_path).with_row_count("idx")
+    read = pl.scan_csv(file_path).with_row_number("idx")
     assert read.collect().schema == OrderedDict([("idx", pl.UInt32), ("a", pl.String)])
diff --git a/py-polars/tests/unit/io/test_lazy_ipc.py b/py-polars/tests/unit/io/test_lazy_ipc.py
@@ -29,7 +29,7 @@ def test_row_count(foods_ipc_path: Path) -> None:
 
     df = (
         pl.scan_ipc(foods_ipc_path, row_count_name="row_count")
-        .with_row_count("foo", 10)
+        .with_row_number("foo", 10)
         .filter(pl.col("category") == pl.lit("vegetables"))
         .collect()
     )

diff --git a/py-polars/tests/unit/io/test_lazy_json.py b/py-polars/tests/unit/io/test_lazy_json.py
@@ -30,7 +30,7 @@ def test_scan_ndjson(foods_ndjson_path: Path) -> None:
 
     df = (
         pl.scan_ndjson(foods_ndjson_path, row_count_name="row_count")
-        .with_row_count("foo", 10)
+        .with_row_number("foo", 10)
         .filter(pl.col("category") == pl.lit("vegetables"))
         .collect()
     )

diff --git a/py-polars/tests/unit/io/test_lazy_parquet.py b/py-polars/tests/unit/io/test_lazy_parquet.py
@@ -50,7 +50,7 @@ def test_row_count(foods_parquet_path: Path) -> None:
 
     df = (
         pl.scan_parquet(foods_parquet_path, row_count_name="row_count")
-        .with_row_count("foo", 10)
+        .with_row_number("foo", 10)
         .filter(pl.col("category") == pl.lit("vegetables"))
         .collect()
     )
@@ -407,7 +407,7 @@ def test_row_count_empty_file(tmp_path: Path) -> None:
     file_path = tmp_path / "test.parquet"
     df = pl.DataFrame({"a": []}, schema={"a": pl.Float32})
     df.write_parquet(file_path)
-    result = pl.scan_parquet(file_path).with_row_count("idx").collect()
+    result = pl.scan_parquet(file_path).with_row_number("idx").collect()
     assert result.schema == OrderedDict([("idx", pl.UInt32), ("a", pl.Float32)])
 
 

diff --git a/py-polars/tests/unit/operations/rolling/test_rolling.py b/py-polars/tests/unit/operations/rolling/test_rolling.py
@@ -275,11 +275,11 @@ def test_rolling_group_by_extrema() -> None:
         {
             "col1": pl.arange(0, 7, eager=True).reverse(),
         }
-    ).with_columns(pl.col("col1").reverse().alias("row_nr"))
+    ).with_columns(pl.col("col1").reverse().alias("row_number"))
 
     assert (
         df.rolling(
-            index_column="row_nr",
+            index_column="row_number",
             period="3i",
         )
         .agg(
@@ -314,11 +314,11 @@ def test_rolling_group_by_extrema() -> None:
         {
             "col1": pl.arange(0, 7, eager=True),
         }
-    ).with_columns(pl.col("col1").alias("row_nr"))
+    ).with_columns(pl.col("col1").alias("row_number"))
 
     assert (
         df.rolling(
-            index_column="row_nr",
+            index_column="row_number",
             period="3i",
         )
         .agg(
@@ -352,11 +352,11 @@ def test_rolling_group_by_extrema() -> None:
         {
             "col1": pl.arange(0, 7, eager=True).shuffle(1),
         }
-    ).with_columns(pl.col("col1").sort().alias("row_nr"))
+    ).with_columns(pl.col("col1").sort().alias("row_number"))
 
     assert (
         df.rolling(
-            index_column="row_nr",
+            index_column="row_number",
             period="3i",
         )
         .agg(
@@ -629,12 +629,12 @@ def test_rolling_aggregations_with_over_11225() -> None:
             "date": [start + timedelta(days=k) for k in range(5)],
             "group": ["A"] * 2 + ["B"] * 3,
         }
-    ).with_row_count()
+    ).with_row_number()
 
     df_temporal = df_temporal.sort("group", "date")
 
     result = df_temporal.with_columns(
-        rolling_row_mean=pl.col("row_nr")
+        rolling_row_mean=pl.col("row_number")
         .rolling_mean(
             window_size="2d",
             by="date",
@@ -645,12 +645,12 @@ def test_rolling_aggregations_with_over_11225() -> None:
     )
     expected = pl.DataFrame(
         {
-            "row_nr": [0, 1, 2, 3, 4],
+            "row_number": [0, 1, 2, 3, 4],
             "date": pl.datetime_range(date(2001, 1, 1), date(2001, 1, 5), eager=True),
             "group": ["A", "A", "B", "B", "B"],
             "rolling_row_mean": [None, 0.0, None, 2.0, 2.5],
         },
-        schema_overrides={"row_nr": pl.UInt32},
+        schema_overrides={"row_number": pl.UInt32},
     )
     assert_frame_equal(result, expected)
 

diff --git a/py-polars/tests/unit/operations/test_explode.py b/py-polars/tests/unit/operations/test_explode.py
@@ -89,10 +89,10 @@ def test_explode_empty_list_4003() -> None:
 
 
 def test_explode_empty_list_4107() -> None:
-    df = pl.DataFrame({"b": [[1], [2], []] * 2}).with_row_count()
+    df = pl.DataFrame({"b": [[1], [2], []] * 2}).with_row_number()
 
     assert_frame_equal(
-        df.explode(["b"]), df.explode(["b"]).drop("row_nr").with_row_count()
+        df.explode(["b"]), df.explode(["b"]).drop("row_number").with_row_number()
     )
 
 
@@ -112,15 +112,15 @@ def test_explode_correct_for_slice() -> None:
             )
         )
         .sort("group")
-        .with_row_count()
+        .with_row_number()
     )
     expected = pl.DataFrame(
         {
-            "row_nr": [0, 0, 0, 1, 1, 2, 3, 3, 3, 4, 5, 5, 5, 6, 6, 7, 8, 8, 8, 9],
+            "row_number": [0, 0, 0, 1, 1, 2, 3, 3, 3, 4, 5, 5, 5, 6, 6, 7, 8, 8, 8, 9],
             "group": [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
             "b": [1, 2, 3, 2, 3, 4, 1, 2, 3, 0, 1, 2, 3, 2, 3, 4, 1, 2, 3, 0],
         },
-        schema_overrides={"row_nr": pl.UInt32},
+        schema_overrides={"row_number": pl.UInt32},
     )
     assert_frame_equal(df.slice(0, 10).explode(["b"]), expected)
 
@@ -215,12 +215,12 @@ def test_explode_in_agg_context() -> None:
     )
 
     assert (
-        df.with_row_count("row_nr")
+        df.with_row_number()
         .explode("idxs")
-        .group_by("row_nr")
+        .group_by("row_number")
         .agg(pl.col("array").flatten())
     ).to_dict(as_series=False) == {
-        "row_nr": [0, 1, 2],
+        "row_number": [0, 1, 2],
         "array": [[0.0, 3.5], [4.6, 0.0], [0.0, 7.8, 0.0, 0.0, 7.8, 0.0]],
     }
 
@@ -281,7 +281,7 @@ def test_explode_invalid_element_count() -> None:
             "col1": [["X", "Y", "Z"], ["F", "G"], ["P"]],
             "col2": [["A", "B", "C"], ["C"], ["D", "E"]],
         }
-    ).with_row_count()
+    ).with_row_number()
     with pytest.raises(
         pl.ShapeError, match=r"exploded columns must have matching element counts"
     ):

diff --git a/py-polars/tests/unit/operations/test_group_by.py b/py-polars/tests/unit/operations/test_group_by.py
@@ -277,17 +277,17 @@ def test_arg_sort_sort_by_groups_update__4360() -> None:
 
 
 def test_unique_order() -> None:
-    df = pl.DataFrame({"a": [1, 2, 1]}).with_row_count()
+    df = pl.DataFrame({"a": [1, 2, 1]}).with_row_number()
     assert df.unique(keep="last", subset="a", maintain_order=True).to_dict(
         as_series=False
     ) == {
-        "row_nr": [1, 2],
+        "row_number": [1, 2],
         "a": [2, 1],
     }
     assert df.unique(keep="first", subset="a", maintain_order=True).to_dict(
         as_series=False
     ) == {
-        "row_nr": [0, 1],
+        "row_number": [0, 1],
         "a": [1, 2],
     }