Update all references

pola-rs · Jan 14, 2024 · 4ce7a39 · 4ce7a39
1 parent e0b33a4
commit 4ce7a39
Show file tree

Hide file tree

Showing 37 changed files with 195 additions and 151 deletions.
diff --git a/docs/src/python/user-guide/basics/expressions.py b/docs/src/python/user-guide/basics/expressions.py
@@ -63,7 +63,7 @@
 # --8<-- [end:dataframe2]
 
 # --8<-- [start:group_by]
-df2.group_by("y", maintain_order=True).count()
+df2.group_by("y", maintain_order=True).len()
 # --8<-- [end:group_by]
 
 # --8<-- [start:group_by2]

diff --git a/docs/src/python/user-guide/expressions/aggregation.py b/docs/src/python/user-guide/expressions/aggregation.py
@@ -24,11 +24,11 @@
     dataset.lazy()
     .group_by("first_name")
     .agg(
-        pl.count(),
+        pl.len(),
         pl.col("gender"),
         pl.first("last_name"),
     )
-    .sort("count", descending=True)
+    .sort("len", descending=True)
     .limit(5)
 )
 

diff --git a/docs/src/python/user-guide/expressions/user-defined-functions.py b/docs/src/python/user-guide/expressions/user-defined-functions.py
@@ -43,7 +43,7 @@ def add_counter(val: int) -> int:
 
 out = df.select(
     pl.col("values").map_elements(add_counter).alias("solution_map_elements"),
-    (pl.col("values") + pl.int_range(1, pl.count() + 1)).alias("solution_expr"),
+    (pl.col("values") + pl.int_range(1, pl.len() + 1)).alias("solution_expr"),
 )
 print(out)
 # --8<-- [end:counter]

diff --git a/docs/src/python/user-guide/io/multiple.py b/docs/src/python/user-guide/io/multiple.py
@@ -28,12 +28,13 @@
 # --8<-- [end:graph]
 
 # --8<-- [start:glob]
-import polars as pl
 import glob
 
+import polars as pl
+
 queries = []
 for file in glob.glob("docs/data/my_many_files_*.csv"):
-    q = pl.scan_csv(file).group_by("bar").agg([pl.count(), pl.sum("foo")])
+    q = pl.scan_csv(file).group_by("bar").agg(pl.len(), pl.sum("foo"))
     queries.append(q)
 
 dataframes = pl.collect_all(queries)

diff --git a/docs/src/python/user-guide/transformations/time-series/rolling.py b/docs/src/python/user-guide/transformations/time-series/rolling.py
@@ -1,7 +1,8 @@
 # --8<-- [start:setup]
-import polars as pl
 from datetime import date, datetime
 
+import polars as pl
+
 # --8<-- [end:setup]
 
 # --8<-- [start:df]
@@ -60,10 +61,6 @@
     closed="both",
     by="groups",
     include_boundaries=True,
-).agg(
-    [
-        pl.count(),
-    ]
-)
+).agg(pl.len())
 print(out)
 # --8<-- [end:group_by_dyn2]
diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py
@@ -5272,10 +5272,10 @@ def with_row_index(self, name: str = "index", offset: int = 0) -> Self:
         └──────┴─────┴─────┘
 
         An index column can also be created using the expressions :func:`int_range`
-        and :func:`count`.
+        and :func:`len`.
 
         >>> df.select(
-        ...     pl.int_range(pl.count(), dtype=pl.UInt32).alias("index"),
+        ...     pl.int_range(pl.len(), dtype=pl.UInt32).alias("index"),
         ...     pl.all(),
         ... )
         shape: (3, 3)
@@ -7260,9 +7260,8 @@ def pivot(
 
             - None: no aggregation takes place, will raise error if multiple values are in group.
             - A predefined aggregate function string, one of
-              {'first', 'sum', 'max', 'min', 'mean', 'median', 'last', 'count'}
+              {'min', 'max', 'first', 'last', 'sum', 'mean', 'median', 'len'}
             - An expression to do the aggregation.
-
         maintain_order
             Sort the grouped keys so that the output order is predictable.
         sort_columns
@@ -7392,8 +7391,15 @@ def pivot(
                 aggregate_expr = F.element().median()._pyexpr
             elif aggregate_function == "last":
                 aggregate_expr = F.element().last()._pyexpr
+            elif aggregate_function == "len":
+                aggregate_expr = F.len()._pyexpr
             elif aggregate_function == "count":
-                aggregate_expr = F.count()._pyexpr
+                issue_deprecation_warning(
+                    "`aggregate_function='count'` input for `pivot` is deprecated."
+                    " Please use `aggregate_function='len'`.",
+                    version="0.20.5",
+                )
+                aggregate_expr = F.len()._pyexpr
             else:
                 msg = f"invalid input for `aggregate_function` argument: {aggregate_function!r}"
                 raise ValueError(msg)

diff --git a/py-polars/polars/dataframe/group_by.py b/py-polars/polars/dataframe/group_by.py
@@ -305,7 +305,7 @@ def map_groups(self, function: Callable[[DataFrame], DataFrame]) -> DataFrame:
         It is better to implement this with an expression:
 
         >>> df.filter(
-        ...     pl.int_range(0, pl.count()).shuffle().over("color") < 2
+        ...     pl.int_range(pl.len()).shuffle().over("color") < 2
         ... )  # doctest: +IGNORE_RESULT
         """
         by: list[str]
@@ -452,6 +452,32 @@ def all(self) -> DataFrame:
         """
         return self.agg(F.all())
 
+    def len(self) -> DataFrame:
+        """
+        Return the number of rows in each group.
+
+        Examples
+        --------
+        >>> df = pl.DataFrame(
+        ...     {
+        ...         "a": ["apple", "apple", "orange"],
+        ...         "b": [1, None, 2],
+        ...     }
+        ... )
+        >>> df.group_by("a").len()  # doctest: +SKIP
+        shape: (2, 2)
+        ┌────────┬─────┐
+        │ a      ┆ len │
+        │ ---    ┆ --- │
+        │ str    ┆ u32 │
+        ╞════════╪═════╡
+        │ apple  ┆ 2   │
+        │ orange ┆ 1   │
+        └────────┴─────┘
+        """
+        return self.agg(F.len())
+
+    @deprecate_renamed_function("len", version="0.20.5")
     def count(self) -> DataFrame:
         """
         Return the number of rows in each group.
@@ -477,7 +503,7 @@ def count(self) -> DataFrame:
         │ orange ┆ 1     │
         └────────┴───────┘
         """
-        return self.agg(F.count())
+        return self.agg(F.len().alias("count"))
 
     def first(self) -> DataFrame:
         """

diff --git a/py-polars/polars/expr/meta.py b/py-polars/polars/expr/meta.py
@@ -130,11 +130,11 @@ def output_name(self, *, raise_if_undetermined: bool = True) -> str | None:
         >>> e_sum_over = pl.sum("foo").over("groups")
         >>> e_sum_over.meta.output_name()
         'foo'
-        >>> e_sum_slice = pl.sum("foo").slice(pl.count() - 10, pl.col("bar"))
+        >>> e_sum_slice = pl.sum("foo").slice(pl.len() - 10, pl.col("bar"))
         >>> e_sum_slice.meta.output_name()
         'foo'
-        >>> pl.count().meta.output_name()
-        'count'
+        >>> pl.len().meta.output_name()
+        'len'
         """
         try:
             return self._pyexpr.meta_output_name()
@@ -180,7 +180,7 @@ def root_names(self) -> list[str]:
         >>> e_sum_over = pl.sum("foo").over("groups")
         >>> e_sum_over.meta.root_names()
         ['foo', 'groups']
-        >>> e_sum_slice = pl.sum("foo").slice(pl.count() - 10, pl.col("bar"))
+        >>> e_sum_slice = pl.sum("foo").slice(pl.len() - 10, pl.col("bar"))
         >>> e_sum_slice.meta.root_names()
         ['foo', 'bar']
         """

diff --git a/py-polars/polars/functions/range/int_range.py b/py-polars/polars/functions/range/int_range.py
@@ -198,11 +198,11 @@ def int_range(
             2
     ]
 
-    Generate an index column using `int_range` in conjunction with :func:`count`.
+    Generate an index column by using `int_range` in conjunction with :func:`len`.
 
     >>> df = pl.DataFrame({"a": [1, 3, 5], "b": [2, 4, 6]})
     >>> df.select(
-    ...     pl.int_range(pl.count(), dtype=pl.UInt32).alias("index"),
+    ...     pl.int_range(pl.len(), dtype=pl.UInt32).alias("index"),
     ...     pl.all(),
     ... )
     shape: (3, 3)

diff --git a/py-polars/polars/lazyframe/frame.py b/py-polars/polars/lazyframe/frame.py
@@ -4616,10 +4616,10 @@ def with_row_index(self, name: str = "index", offset: int = 0) -> Self:
         └──────┴─────┴─────┘
 
         An index column can also be created using the expressions :func:`int_range`
-        and :func:`count`.
+        and :func:`len`.
 
         >>> lf.select(
-        ...     pl.int_range(pl.count(), dtype=pl.UInt32).alias("index"),
+        ...     pl.int_range(pl.len(), dtype=pl.UInt32).alias("index"),
         ...     pl.all(),
         ... ).collect()
         shape: (3, 3)

diff --git a/py-polars/polars/lazyframe/group_by.py b/py-polars/polars/lazyframe/group_by.py
@@ -208,11 +208,9 @@ def map_groups(
 
         It is better to implement this with an expression:
 
-        >>> (
-        ...     df.lazy()
-        ...     .filter(pl.int_range(0, pl.count()).shuffle().over("color") < 2)
-        ...     .collect()
-        ... )  # doctest: +IGNORE_RESULT
+        >>> df.lazy().filter(
+        ...     pl.int_range(pl.len()).shuffle().over("color") < 2
+        ... ).collect()  # doctest: +IGNORE_RESULT
         """
         return wrap_ldf(self.lgb.map_groups(function, schema))
 
@@ -335,6 +333,34 @@ def all(self) -> LazyFrame:
         """
         return self.agg(F.all())
 
+    def len(self) -> LazyFrame:
+        """
+        Return the number of rows in each group.
+
+        Rows containing null values count towards the total.
+
+        Examples
+        --------
+        >>> lf = pl.LazyFrame(
+        ...     {
+        ...         "a": ["apple", "apple", "orange"],
+        ...         "b": [1, None, 2],
+        ...     }
+        ... )
+        >>> lf.group_by("a").count().collect()  # doctest: +SKIP
+        shape: (2, 2)
+        ┌────────┬───────┐
+        │ a      ┆ count │
+        │ ---    ┆ ---   │
+        │ str    ┆ u32   │
+        ╞════════╪═══════╡
+        │ apple  ┆ 2     │
+        │ orange ┆ 1     │
+        └────────┴───────┘
+        """
+        return self.agg(F.len())
+
+    @deprecate_renamed_function("len", version="0.20.5")
     def count(self) -> LazyFrame:
         """
         Return the number of rows in each group.
@@ -360,7 +386,7 @@ def count(self) -> LazyFrame:
         │ orange ┆ 1     │
         └────────┴───────┘
         """
-        return self.agg(F.count())
+        return self.agg(F.len().alias("count"))
 
     def first(self) -> LazyFrame:
         """

diff --git a/py-polars/polars/type_aliases.py b/py-polars/polars/type_aliases.py
@@ -100,7 +100,7 @@
     "lz4", "uncompressed", "snappy", "gzip", "lzo", "brotli", "zstd"
 ]
 PivotAgg: TypeAlias = Literal[
-    "first", "sum", "max", "min", "mean", "median", "last", "count"
+    "min", "max", "first", "last", "sum", "mean", "median", "len"
 ]
 RankMethod: TypeAlias = Literal["average", "min", "max", "dense", "ordinal", "random"]
 SizeUnit: TypeAlias = Literal[

diff --git a/py-polars/tests/unit/dataframe/test_df.py b/py-polars/tests/unit/dataframe/test_df.py
@@ -1774,9 +1774,9 @@ def __repr__(self) -> str:
 def test_group_by_order_dispatch() -> None:
     df = pl.DataFrame({"x": list("bab"), "y": range(3)})
 
-    result = df.group_by("x", maintain_order=True).count()
+    result = df.group_by("x", maintain_order=True).len()
     expected = pl.DataFrame(
-        {"x": ["b", "a"], "count": [2, 1]}, schema_overrides={"count": pl.UInt32}
+        {"x": ["b", "a"], "len": [2, 1]}, schema_overrides={"len": pl.UInt32}
     )
     assert_frame_equal(result, expected)
 
@@ -2409,7 +2409,7 @@ def test_group_by_slice_expression_args() -> None:
 
     out = (
         df.group_by("groups", maintain_order=True)
-        .agg([pl.col("vals").slice(pl.count() * 0.1, (pl.count() // 5))])
+        .agg([pl.col("vals").slice(pl.len() * 0.1, (pl.len() // 5))])
         .explode("vals")
     )
 

diff --git a/py-polars/tests/unit/datatypes/test_categorical.py b/py-polars/tests/unit/datatypes/test_categorical.py
@@ -124,7 +124,7 @@ def test_unset_sorted_on_append() -> None:
         ]
     ).sort("key")
     df = pl.concat([df1, df2], rechunk=False)
-    assert df.group_by("key").count()["count"].to_list() == [4, 4]
+    assert df.group_by("key").len()["len"].to_list() == [4, 4]
 
 
 @pytest.mark.parametrize(

diff --git a/py-polars/tests/unit/datatypes/test_temporal.py b/py-polars/tests/unit/datatypes/test_temporal.py
@@ -1310,13 +1310,13 @@ def test_rolling_by_() -> None:
     out = (
         df.sort("datetime")
         .rolling(index_column="datetime", by="group", period=timedelta(days=3))
-        .agg([pl.count().alias("count")])
+        .agg([pl.len().alias("count")])
     )
 
     expected = (
         df.sort(["group", "datetime"])
         .rolling(index_column="datetime", by="group", period="3d")
-        .agg([pl.count().alias("count")])
+        .agg([pl.len().alias("count")])
     )
     assert_frame_equal(out.sort(["group", "datetime"]), expected)
     assert out.to_dict(as_series=False) == {
@@ -2573,30 +2573,18 @@ def test_datetime_cum_agg_schema() -> None:
 
 
 def test_rolling_group_by_empty_groups_by_take_6330() -> None:
-    df = (
-        pl.DataFrame({"Event": ["Rain", "Sun"]})
-        .join(
-            pl.DataFrame(
-                {
-                    "Date": [1, 2, 3, 4],
-                }
-            ),
-            how="cross",
-        )
-        .set_sorted("Date")
-    )
-    assert (
-        df.rolling(
-            index_column="Date",
-            period="2i",
-            offset="-2i",
-            by="Event",
-            closed="left",
-        ).agg([pl.count()])
-    ).to_dict(as_series=False) == {
+    df1 = pl.DataFrame({"Event": ["Rain", "Sun"]})
+    df2 = pl.DataFrame({"Date": [1, 2, 3, 4]})
+    df = df1.join(df2, how="cross").set_sorted("Date")
+
+    result = df.rolling(
+        index_column="Date", period="2i", offset="-2i", by="Event", closed="left"
+    ).agg(pl.len())
+
+    assert result.to_dict(as_series=False) == {
         "Event": ["Rain", "Rain", "Rain", "Rain", "Sun", "Sun", "Sun", "Sun"],
         "Date": [1, 2, 3, 4, 1, 2, 3, 4],
-        "count": [0, 1, 2, 2, 0, 1, 2, 2],
+        "len": [0, 1, 2, 2, 0, 1, 2, 2],
     }