Skip to content

Commit

Permalink
docs(python): Docstrings for frame, lazyframe and time series (#5398)
Browse files Browse the repository at this point in the history
Co-authored-by: Liam Brannigan <l.brannigan@analyticsengines.com>
  • Loading branch information
braaannigan and Liam Brannigan authored Nov 2, 2022
1 parent ad678ca commit 982c10e
Show file tree
Hide file tree
Showing 4 changed files with 520 additions and 0 deletions.
211 changes: 211 additions & 0 deletions py-polars/polars/internals/dataframe/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -3881,6 +3881,42 @@ def join(
│ 3 ┆ 8.0 ┆ c ┆ null │
└──────┴──────┴─────┴───────┘
>>> df.join(other_df, on="ham", how="left")
shape: (3, 4)
┌─────┬─────┬─────┬───────┐
│ foo ┆ bar ┆ ham ┆ apple │
│ --- ┆ --- ┆ --- ┆ --- │
│ i64 ┆ f64 ┆ str ┆ str │
╞═════╪═════╪═════╪═══════╡
│ 1 ┆ 6.0 ┆ a ┆ x │
├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
│ 2 ┆ 7.0 ┆ b ┆ y │
├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌╌╌┤
│ 3 ┆ 8.0 ┆ c ┆ null │
└─────┴─────┴─────┴───────┘
>>> df.join(other_df, on="ham", how="semi")
shape: (2, 3)
┌─────┬─────┬─────┐
│ foo ┆ bar ┆ ham │
│ --- ┆ --- ┆ --- │
│ i64 ┆ f64 ┆ str │
╞═════╪═════╪═════╡
│ 1 ┆ 6.0 ┆ a │
├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
│ 2 ┆ 7.0 ┆ b │
└─────┴─────┴─────┘
>>> df.join(other_df, on="ham", how="anti")
shape: (1, 3)
┌─────┬─────┬─────┐
│ foo ┆ bar ┆ ham │
│ --- ┆ --- ┆ --- │
│ i64 ┆ f64 ┆ str │
╞═════╪═════╪═════╡
│ 3 ┆ 8.0 ┆ c │
└─────┴─────┴─────┘
Notes
-----
For joining on columns with categorical data, see ``pl.StringCache()``.
Expand Down Expand Up @@ -4467,6 +4503,53 @@ def fill_null(
├╌╌╌╌╌┼╌╌╌╌╌╌┤
│ 4 ┆ 13.0 │
└─────┴──────┘
>>> df.fill_null(strategy="forward")
shape: (4, 2)
┌─────┬──────┐
│ a ┆ b │
│ --- ┆ --- │
│ i64 ┆ f64 │
╞═════╪══════╡
│ 1 ┆ 0.5 │
├╌╌╌╌╌┼╌╌╌╌╌╌┤
│ 2 ┆ 4.0 │
├╌╌╌╌╌┼╌╌╌╌╌╌┤
│ 2 ┆ 4.0 │
├╌╌╌╌╌┼╌╌╌╌╌╌┤
│ 4 ┆ 13.0 │
└─────┴──────┘
>>> df.fill_null(strategy="max")
shape: (4, 2)
┌─────┬──────┐
│ a ┆ b │
│ --- ┆ --- │
│ i64 ┆ f64 │
╞═════╪══════╡
│ 1 ┆ 0.5 │
├╌╌╌╌╌┼╌╌╌╌╌╌┤
│ 2 ┆ 4.0 │
├╌╌╌╌╌┼╌╌╌╌╌╌┤
│ 4 ┆ 13.0 │
├╌╌╌╌╌┼╌╌╌╌╌╌┤
│ 4 ┆ 13.0 │
└─────┴──────┘
>>> df.fill_null(strategy="zero")
shape: (4, 2)
┌─────┬──────┐
│ a ┆ b │
│ --- ┆ --- │
│ i64 ┆ f64 │
╞═════╪══════╡
│ 1 ┆ 0.5 │
├╌╌╌╌╌┼╌╌╌╌╌╌┤
│ 2 ┆ 4.0 │
├╌╌╌╌╌┼╌╌╌╌╌╌┤
│ 0 ┆ 0.0 │
├╌╌╌╌╌┼╌╌╌╌╌╌┤
│ 4 ┆ 13.0 │
└─────┴──────┘
"""
return self._from_pydf(
Expand Down Expand Up @@ -4983,6 +5066,33 @@ def partition_by(
╞═════╪═════╪═════╡
│ C ┆ 2 ┆ l │
└─────┴─────┴─────┘]
>>> df.partition_by(groups="foo", maintain_order=True, as_dict=True)
{'A': shape: (2, 3)
┌─────┬─────┬─────┐
│ foo ┆ N ┆ bar │
│ --- ┆ --- ┆ --- │
│ str ┆ i64 ┆ str │
╞═════╪═════╪═════╡
│ A ┆ 1 ┆ k │
├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
│ A ┆ 2 ┆ l │
└─────┴─────┴─────┘, 'B': shape: (2, 3)
┌─────┬─────┬─────┐
│ foo ┆ N ┆ bar │
│ --- ┆ --- ┆ --- │
│ str ┆ i64 ┆ str │
╞═════╪═════╪═════╡
│ B ┆ 2 ┆ m │
├╌╌╌╌╌┼╌╌╌╌╌┼╌╌╌╌╌┤
│ B ┆ 4 ┆ m │
└─────┴─────┴─────┘, 'C': shape: (1, 3)
┌─────┬─────┬─────┐
│ foo ┆ N ┆ bar │
│ --- ┆ --- ┆ --- │
│ str ┆ i64 ┆ str │
╞═════╪═════╪═════╡
│ C ┆ 2 ┆ l │
└─────┴─────┴─────┘}
"""
if isinstance(groups, str):
Expand Down Expand Up @@ -5218,6 +5328,62 @@ def select(
│ 3 │
└─────┘
>>> df.select(["foo", "bar"])
shape: (3, 2)
┌─────┬─────┐
│ foo ┆ bar │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 1 ┆ 6 │
├╌╌╌╌╌┼╌╌╌╌╌┤
│ 2 ┆ 7 │
├╌╌╌╌╌┼╌╌╌╌╌┤
│ 3 ┆ 8 │
└─────┴─────┘
>>> df.select(pl.col("foo") + 1)
shape: (3, 1)
┌─────┐
│ foo │
│ --- │
│ i64 │
╞═════╡
│ 2 │
├╌╌╌╌╌┤
│ 3 │
├╌╌╌╌╌┤
│ 4 │
└─────┘
>>> df.select([pl.col("foo") + 1, pl.col("bar") + 1])
shape: (3, 2)
┌─────┬─────┐
│ foo ┆ bar │
│ --- ┆ --- │
│ i64 ┆ i64 │
╞═════╪═════╡
│ 2 ┆ 7 │
├╌╌╌╌╌┼╌╌╌╌╌┤
│ 3 ┆ 8 │
├╌╌╌╌╌┼╌╌╌╌╌┤
│ 4 ┆ 9 │
└─────┴─────┘
>>> df.select(pl.when(pl.col("foo") > 2).then(10).otherwise(0))
shape: (3, 1)
┌─────────┐
│ literal │
│ --- │
│ i64 │
╞═════════╡
│ 0 │
├╌╌╌╌╌╌╌╌╌┤
│ 0 │
├╌╌╌╌╌╌╌╌╌┤
│ 10 │
└─────────┘
"""
return self._from_pydf(
self.lazy()
Expand Down Expand Up @@ -5338,6 +5504,8 @@ def n_chunks(self, strategy: str = "first") -> int | list[int]:
... )
>>> df.n_chunks()
1
>>> df.n_chunks(strategy="all")
[1, 1, 1]
"""
if strategy == "first":
Expand Down Expand Up @@ -5495,6 +5663,14 @@ def sum(
╞═════╪═════╪══════╡
│ 6 ┆ 21 ┆ null │
└─────┴─────┴──────┘
>>> df.sum(axis=1)
shape: (3,)
Series: 'foo' [str]
[
"16a"
"27b"
"38c"
]
"""
if axis == 0:
Expand Down Expand Up @@ -5564,6 +5740,23 @@ def mean(
│ 2.0 ┆ 7.0 ┆ null │
└─────┴─────┴──────┘
Note: a PanicException is raised with axis = 1 and a string column.
>>> df = pl.DataFrame(
... {
... "foo": [1, 2, 3],
... "bar": [6, 7, 8],
... }
... )
>>> df.mean(axis=1)
shape: (3,)
Series: 'foo' [f64]
[
3.5
4.5
5.5
]
Note: the mean of booleans evaluates to null.
>>> df = pl.DataFrame(
Expand Down Expand Up @@ -5628,6 +5821,15 @@ def std(self: DF, ddof: int = 1) -> DF:
╞═════╪═════╪══════╡
│ 1.0 ┆ 1.0 ┆ null │
└─────┴─────┴──────┘
>>> df.std(ddof=0)
shape: (1, 3)
┌──────────┬──────────┬──────┐
│ foo ┆ bar ┆ ham │
│ --- ┆ --- ┆ --- │
│ f64 ┆ f64 ┆ str │
╞══════════╪══════════╪══════╡
│ 0.816497 ┆ 0.816497 ┆ null │
└──────────┴──────────┴──────┘
"""
return self._from_pydf(self._df.std(ddof))
Expand Down Expand Up @@ -5659,6 +5861,15 @@ def var(self: DF, ddof: int = 1) -> DF:
╞═════╪═════╪══════╡
│ 1.0 ┆ 1.0 ┆ null │
└─────┴─────┴──────┘
>>> df.var(ddof=0)
shape: (1, 3)
┌──────────┬──────────┬──────┐
│ foo ┆ bar ┆ ham │
│ --- ┆ --- ┆ --- │
│ f64 ┆ f64 ┆ str │
╞══════════╪══════════╪══════╡
│ 0.666667 ┆ 0.666667 ┆ null │
└──────────┴──────────┴──────┘
"""
return self._from_pydf(self._df.var(ddof))
Expand Down
57 changes: 57 additions & 0 deletions py-polars/polars/internals/expr/datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ def truncate(
"""
Divide the date/datetime range into buckets.
Each date/datetime is mapped to the start of its bucket.
Parameters
----------
every
Expand Down Expand Up @@ -111,6 +113,31 @@ def truncate(
... )
True
>>> start = datetime(2001, 1, 1)
>>> stop = datetime(2001, 1, 1, 1)
>>> df = pl.date_range(start, stop, "10m", name="dates").to_frame()
>>> df.select(["dates", pl.col("dates").dt.truncate("30m").alias("truncate")])
shape: (7, 2)
┌─────────────────────┬─────────────────────┐
│ dates ┆ truncate │
│ --- ┆ --- │
│ datetime[μs] ┆ datetime[μs] │
╞═════════════════════╪═════════════════════╡
│ 2001-01-01 00:00:00 ┆ 2001-01-01 00:00:00 │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 2001-01-01 00:10:00 ┆ 2001-01-01 00:00:00 │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 2001-01-01 00:20:00 ┆ 2001-01-01 00:00:00 │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 2001-01-01 00:30:00 ┆ 2001-01-01 00:30:00 │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 2001-01-01 00:40:00 ┆ 2001-01-01 00:30:00 │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 2001-01-01 00:50:00 ┆ 2001-01-01 00:30:00 │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 2001-01-01 01:00:00 ┆ 2001-01-01 01:00:00 │
└─────────────────────┴─────────────────────┘
"""
if offset is None:
offset = "0ns"
Expand All @@ -128,6 +155,11 @@ def round(
"""
Divide the date/datetime range into buckets.
Each date/datetime in the first half of the interval
is mapped to the start of its bucket.
Each date/datetime in the seconod half of the interval
is mapped to the end of its bucket.
Parameters
----------
every
Expand Down Expand Up @@ -217,6 +249,31 @@ def round(
... )
True
>>> start = datetime(2001, 1, 1)
>>> stop = datetime(2001, 1, 1, 1)
>>> df = pl.date_range(start, stop, "10m", name="dates").to_frame()
>>> df.select(["dates", pl.col("dates").dt.round("30m").alias("round")])
shape: (7, 2)
┌─────────────────────┬─────────────────────┐
│ dates ┆ round │
│ --- ┆ --- │
│ datetime[μs] ┆ datetime[μs] │
╞═════════════════════╪═════════════════════╡
│ 2001-01-01 00:00:00 ┆ 2001-01-01 00:00:00 │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 2001-01-01 00:10:00 ┆ 2001-01-01 00:00:00 │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 2001-01-01 00:20:00 ┆ 2001-01-01 00:30:00 │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 2001-01-01 00:30:00 ┆ 2001-01-01 00:30:00 │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 2001-01-01 00:40:00 ┆ 2001-01-01 00:30:00 │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 2001-01-01 00:50:00 ┆ 2001-01-01 01:00:00 │
├╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┼╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌╌┤
│ 2001-01-01 01:00:00 ┆ 2001-01-01 01:00:00 │
└─────────────────────┴─────────────────────┘
"""
if offset is None:
offset = "0ns"
Expand Down
Loading

0 comments on commit 982c10e

Please sign in to comment.