Skip to content

Commit

Permalink
Update all references
Browse files Browse the repository at this point in the history
  • Loading branch information
stinodego committed Jan 14, 2024
1 parent e0b33a4 commit 4ce7a39
Show file tree
Hide file tree
Showing 37 changed files with 195 additions and 151 deletions.
2 changes: 1 addition & 1 deletion docs/src/python/user-guide/basics/expressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@
# --8<-- [end:dataframe2]

# --8<-- [start:group_by]
df2.group_by("y", maintain_order=True).count()
df2.group_by("y", maintain_order=True).len()
# --8<-- [end:group_by]

# --8<-- [start:group_by2]
Expand Down
4 changes: 2 additions & 2 deletions docs/src/python/user-guide/expressions/aggregation.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,11 @@
dataset.lazy()
.group_by("first_name")
.agg(
pl.count(),
pl.len(),
pl.col("gender"),
pl.first("last_name"),
)
.sort("count", descending=True)
.sort("len", descending=True)
.limit(5)
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def add_counter(val: int) -> int:

out = df.select(
pl.col("values").map_elements(add_counter).alias("solution_map_elements"),
(pl.col("values") + pl.int_range(1, pl.count() + 1)).alias("solution_expr"),
(pl.col("values") + pl.int_range(1, pl.len() + 1)).alias("solution_expr"),
)
print(out)
# --8<-- [end:counter]
Expand Down
5 changes: 3 additions & 2 deletions docs/src/python/user-guide/io/multiple.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,13 @@
# --8<-- [end:graph]

# --8<-- [start:glob]
import polars as pl
import glob

import polars as pl

queries = []
for file in glob.glob("docs/data/my_many_files_*.csv"):
q = pl.scan_csv(file).group_by("bar").agg([pl.count(), pl.sum("foo")])
q = pl.scan_csv(file).group_by("bar").agg(pl.len(), pl.sum("foo"))
queries.append(q)

dataframes = pl.collect_all(queries)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
# --8<-- [start:setup]
import polars as pl
from datetime import date, datetime

import polars as pl

# --8<-- [end:setup]

# --8<-- [start:df]
Expand Down Expand Up @@ -60,10 +61,6 @@
closed="both",
by="groups",
include_boundaries=True,
).agg(
[
pl.count(),
]
)
).agg(pl.len())
print(out)
# --8<-- [end:group_by_dyn2]
16 changes: 11 additions & 5 deletions py-polars/polars/dataframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -5272,10 +5272,10 @@ def with_row_index(self, name: str = "index", offset: int = 0) -> Self:
└──────┴─────┴─────┘
An index column can also be created using the expressions :func:`int_range`
and :func:`count`.
and :func:`len`.
>>> df.select(
... pl.int_range(pl.count(), dtype=pl.UInt32).alias("index"),
... pl.int_range(pl.len(), dtype=pl.UInt32).alias("index"),
... pl.all(),
... )
shape: (3, 3)
Expand Down Expand Up @@ -7260,9 +7260,8 @@ def pivot(
- None: no aggregation takes place, will raise error if multiple values are in group.
- A predefined aggregate function string, one of
{'first', 'sum', 'max', 'min', 'mean', 'median', 'last', 'count'}
{'min', 'max', 'first', 'last', 'sum', 'mean', 'median', 'len'}
- An expression to do the aggregation.
maintain_order
Sort the grouped keys so that the output order is predictable.
sort_columns
Expand Down Expand Up @@ -7392,8 +7391,15 @@ def pivot(
aggregate_expr = F.element().median()._pyexpr
elif aggregate_function == "last":
aggregate_expr = F.element().last()._pyexpr
elif aggregate_function == "len":
aggregate_expr = F.len()._pyexpr
elif aggregate_function == "count":
aggregate_expr = F.count()._pyexpr
issue_deprecation_warning(
"`aggregate_function='count'` input for `pivot` is deprecated."
" Please use `aggregate_function='len'`.",
version="0.20.5",
)
aggregate_expr = F.len()._pyexpr
else:
msg = f"invalid input for `aggregate_function` argument: {aggregate_function!r}"
raise ValueError(msg)
Expand Down
30 changes: 28 additions & 2 deletions py-polars/polars/dataframe/group_by.py
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,7 @@ def map_groups(self, function: Callable[[DataFrame], DataFrame]) -> DataFrame:
It is better to implement this with an expression:
>>> df.filter(
... pl.int_range(0, pl.count()).shuffle().over("color") < 2
... pl.int_range(pl.len()).shuffle().over("color") < 2
... ) # doctest: +IGNORE_RESULT
"""
by: list[str]
Expand Down Expand Up @@ -452,6 +452,32 @@ def all(self) -> DataFrame:
"""
return self.agg(F.all())

def len(self) -> DataFrame:
"""
Return the number of rows in each group.
Examples
--------
>>> df = pl.DataFrame(
... {
... "a": ["apple", "apple", "orange"],
... "b": [1, None, 2],
... }
... )
>>> df.group_by("a").len() # doctest: +SKIP
shape: (2, 2)
┌────────┬─────┐
│ a ┆ len │
│ --- ┆ --- │
│ str ┆ u32 │
╞════════╪═════╡
│ apple ┆ 2 │
│ orange ┆ 1 │
└────────┴─────┘
"""
return self.agg(F.len())

@deprecate_renamed_function("len", version="0.20.5")
def count(self) -> DataFrame:
"""
Return the number of rows in each group.
Expand All @@ -477,7 +503,7 @@ def count(self) -> DataFrame:
│ orange ┆ 1 │
└────────┴───────┘
"""
return self.agg(F.count())
return self.agg(F.len().alias("count"))

def first(self) -> DataFrame:
"""
Expand Down
8 changes: 4 additions & 4 deletions py-polars/polars/expr/meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,11 +130,11 @@ def output_name(self, *, raise_if_undetermined: bool = True) -> str | None:
>>> e_sum_over = pl.sum("foo").over("groups")
>>> e_sum_over.meta.output_name()
'foo'
>>> e_sum_slice = pl.sum("foo").slice(pl.count() - 10, pl.col("bar"))
>>> e_sum_slice = pl.sum("foo").slice(pl.len() - 10, pl.col("bar"))
>>> e_sum_slice.meta.output_name()
'foo'
>>> pl.count().meta.output_name()
'count'
>>> pl.len().meta.output_name()
'len'
"""
try:
return self._pyexpr.meta_output_name()
Expand Down Expand Up @@ -180,7 +180,7 @@ def root_names(self) -> list[str]:
>>> e_sum_over = pl.sum("foo").over("groups")
>>> e_sum_over.meta.root_names()
['foo', 'groups']
>>> e_sum_slice = pl.sum("foo").slice(pl.count() - 10, pl.col("bar"))
>>> e_sum_slice = pl.sum("foo").slice(pl.len() - 10, pl.col("bar"))
>>> e_sum_slice.meta.root_names()
['foo', 'bar']
"""
Expand Down
4 changes: 2 additions & 2 deletions py-polars/polars/functions/range/int_range.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,11 +198,11 @@ def int_range(
2
]
Generate an index column using `int_range` in conjunction with :func:`count`.
Generate an index column by using `int_range` in conjunction with :func:`len`.
>>> df = pl.DataFrame({"a": [1, 3, 5], "b": [2, 4, 6]})
>>> df.select(
... pl.int_range(pl.count(), dtype=pl.UInt32).alias("index"),
... pl.int_range(pl.len(), dtype=pl.UInt32).alias("index"),
... pl.all(),
... )
shape: (3, 3)
Expand Down
4 changes: 2 additions & 2 deletions py-polars/polars/lazyframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -4616,10 +4616,10 @@ def with_row_index(self, name: str = "index", offset: int = 0) -> Self:
└──────┴─────┴─────┘
An index column can also be created using the expressions :func:`int_range`
and :func:`count`.
and :func:`len`.
>>> lf.select(
... pl.int_range(pl.count(), dtype=pl.UInt32).alias("index"),
... pl.int_range(pl.len(), dtype=pl.UInt32).alias("index"),
... pl.all(),
... ).collect()
shape: (3, 3)
Expand Down
38 changes: 32 additions & 6 deletions py-polars/polars/lazyframe/group_by.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,11 +208,9 @@ def map_groups(
It is better to implement this with an expression:
>>> (
... df.lazy()
... .filter(pl.int_range(0, pl.count()).shuffle().over("color") < 2)
... .collect()
... ) # doctest: +IGNORE_RESULT
>>> df.lazy().filter(
... pl.int_range(pl.len()).shuffle().over("color") < 2
... ).collect() # doctest: +IGNORE_RESULT
"""
return wrap_ldf(self.lgb.map_groups(function, schema))

Expand Down Expand Up @@ -335,6 +333,34 @@ def all(self) -> LazyFrame:
"""
return self.agg(F.all())

def len(self) -> LazyFrame:
"""
Return the number of rows in each group.
Rows containing null values count towards the total.
Examples
--------
>>> lf = pl.LazyFrame(
... {
... "a": ["apple", "apple", "orange"],
... "b": [1, None, 2],
... }
... )
>>> lf.group_by("a").count().collect() # doctest: +SKIP
shape: (2, 2)
┌────────┬───────┐
│ a ┆ count │
│ --- ┆ --- │
│ str ┆ u32 │
╞════════╪═══════╡
│ apple ┆ 2 │
│ orange ┆ 1 │
└────────┴───────┘
"""
return self.agg(F.len())

@deprecate_renamed_function("len", version="0.20.5")
def count(self) -> LazyFrame:
"""
Return the number of rows in each group.
Expand All @@ -360,7 +386,7 @@ def count(self) -> LazyFrame:
│ orange ┆ 1 │
└────────┴───────┘
"""
return self.agg(F.count())
return self.agg(F.len().alias("count"))

def first(self) -> LazyFrame:
"""
Expand Down
2 changes: 1 addition & 1 deletion py-polars/polars/type_aliases.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@
"lz4", "uncompressed", "snappy", "gzip", "lzo", "brotli", "zstd"
]
PivotAgg: TypeAlias = Literal[
"first", "sum", "max", "min", "mean", "median", "last", "count"
"min", "max", "first", "last", "sum", "mean", "median", "len"
]
RankMethod: TypeAlias = Literal["average", "min", "max", "dense", "ordinal", "random"]
SizeUnit: TypeAlias = Literal[
Expand Down
6 changes: 3 additions & 3 deletions py-polars/tests/unit/dataframe/test_df.py
Original file line number Diff line number Diff line change
Expand Up @@ -1774,9 +1774,9 @@ def __repr__(self) -> str:
def test_group_by_order_dispatch() -> None:
df = pl.DataFrame({"x": list("bab"), "y": range(3)})

result = df.group_by("x", maintain_order=True).count()
result = df.group_by("x", maintain_order=True).len()
expected = pl.DataFrame(
{"x": ["b", "a"], "count": [2, 1]}, schema_overrides={"count": pl.UInt32}
{"x": ["b", "a"], "len": [2, 1]}, schema_overrides={"len": pl.UInt32}
)
assert_frame_equal(result, expected)

Expand Down Expand Up @@ -2409,7 +2409,7 @@ def test_group_by_slice_expression_args() -> None:

out = (
df.group_by("groups", maintain_order=True)
.agg([pl.col("vals").slice(pl.count() * 0.1, (pl.count() // 5))])
.agg([pl.col("vals").slice(pl.len() * 0.1, (pl.len() // 5))])
.explode("vals")
)

Expand Down
2 changes: 1 addition & 1 deletion py-polars/tests/unit/datatypes/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ def test_unset_sorted_on_append() -> None:
]
).sort("key")
df = pl.concat([df1, df2], rechunk=False)
assert df.group_by("key").count()["count"].to_list() == [4, 4]
assert df.group_by("key").len()["len"].to_list() == [4, 4]


@pytest.mark.parametrize(
Expand Down
36 changes: 12 additions & 24 deletions py-polars/tests/unit/datatypes/test_temporal.py
Original file line number Diff line number Diff line change
Expand Up @@ -1310,13 +1310,13 @@ def test_rolling_by_() -> None:
out = (
df.sort("datetime")
.rolling(index_column="datetime", by="group", period=timedelta(days=3))
.agg([pl.count().alias("count")])
.agg([pl.len().alias("count")])
)

expected = (
df.sort(["group", "datetime"])
.rolling(index_column="datetime", by="group", period="3d")
.agg([pl.count().alias("count")])
.agg([pl.len().alias("count")])
)
assert_frame_equal(out.sort(["group", "datetime"]), expected)
assert out.to_dict(as_series=False) == {
Expand Down Expand Up @@ -2573,30 +2573,18 @@ def test_datetime_cum_agg_schema() -> None:


def test_rolling_group_by_empty_groups_by_take_6330() -> None:
df = (
pl.DataFrame({"Event": ["Rain", "Sun"]})
.join(
pl.DataFrame(
{
"Date": [1, 2, 3, 4],
}
),
how="cross",
)
.set_sorted("Date")
)
assert (
df.rolling(
index_column="Date",
period="2i",
offset="-2i",
by="Event",
closed="left",
).agg([pl.count()])
).to_dict(as_series=False) == {
df1 = pl.DataFrame({"Event": ["Rain", "Sun"]})
df2 = pl.DataFrame({"Date": [1, 2, 3, 4]})
df = df1.join(df2, how="cross").set_sorted("Date")

result = df.rolling(
index_column="Date", period="2i", offset="-2i", by="Event", closed="left"
).agg(pl.len())

assert result.to_dict(as_series=False) == {
"Event": ["Rain", "Rain", "Rain", "Rain", "Sun", "Sun", "Sun", "Sun"],
"Date": [1, 2, 3, 4, 1, 2, 3, 4],
"count": [0, 1, 2, 2, 0, 1, 2, 2],
"len": [0, 1, 2, 2, 0, 1, 2, 2],
}


Expand Down
Loading

0 comments on commit 4ce7a39

Please sign in to comment.