From 9c0760276b972a75572542af9661fd564d243961 Mon Sep 17 00:00:00 2001 From: Robin Date: Tue, 16 Jan 2024 11:37:23 +0100 Subject: [PATCH] docs: Improve streaming section of the user guide (#13750) --- docs/_build/API_REFERENCE_LINKS.yml | 2 ++ .../python/user-guide/concepts/streaming.py | 20 +++++++++++++--- .../src/rust/user-guide/concepts/streaming.rs | 21 ++++++++++++++-- docs/user-guide/concepts/streaming.md | 24 +++++++++++++++++++ 4 files changed, 62 insertions(+), 5 deletions(-) diff --git a/docs/_build/API_REFERENCE_LINKS.yml b/docs/_build/API_REFERENCE_LINKS.yml index 9b2b02058a17..b76a38fec350 100644 --- a/docs/_build/API_REFERENCE_LINKS.yml +++ b/docs/_build/API_REFERENCE_LINKS.yml @@ -169,6 +169,8 @@ rust: concat: https://docs.pola.rs/docs/rust/dev/polars_lazy/dsl/functions/fn.concat.html SQLContext: https://docs.pola.rs/py-polars/html/reference/sql.html + explain: https://docs.rs/polars/latest/polars/prelude/struct.LazyFrame.html#method.explain + operators: https://docs.pola.rs/docs/rust/dev/polars_lazy/dsl/enum.Operator.html Array: https://docs.pola.rs/docs/rust/dev/polars/datatypes/enum.DataType.html#variant.Array diff --git a/docs/src/python/user-guide/concepts/streaming.py b/docs/src/python/user-guide/concepts/streaming.py index 955750bf6c30..a54f545c0979 100644 --- a/docs/src/python/user-guide/concepts/streaming.py +++ b/docs/src/python/user-guide/concepts/streaming.py @@ -1,12 +1,26 @@ +# --8<-- [start:import] import polars as pl +# --8<-- [end:import] # --8<-- [start:streaming] -q = ( +q1 = ( pl.scan_csv("docs/data/iris.csv") .filter(pl.col("sepal_length") > 5) .group_by("species") .agg(pl.col("sepal_width").mean()) ) - -df = q.collect(streaming=True) +df = q1.collect(streaming=True) # --8<-- [end:streaming] + +# --8<-- [start:example] +print(q1.explain(streaming=True)) + +# --8<-- [end:example] + +# --8<-- [start:example2] +q2 = pl.scan_csv("docs/data/iris.csv").with_columns( + pl.col("sepal_length").mean().over("species") +) + +print(q2.explain(streaming=True)) +# --8<-- [end:example2] diff --git a/docs/src/rust/user-guide/concepts/streaming.rs b/docs/src/rust/user-guide/concepts/streaming.rs index ae4efc27474a..700458fb635b 100644 --- a/docs/src/rust/user-guide/concepts/streaming.rs +++ b/docs/src/rust/user-guide/concepts/streaming.rs @@ -2,16 +2,33 @@ use polars::prelude::*; fn main() -> Result<(), Box> { // --8<-- [start:streaming] - let q = LazyCsvReader::new("docs/data/iris.csv") + let q1 = LazyCsvReader::new("docs/data/iris.csv") .has_header(true) .finish()? .filter(col("sepal_length").gt(lit(5))) .group_by(vec![col("species")]) .agg([col("sepal_width").mean()]); - let df = q.with_streaming(true).collect()?; + let df = q1.clone().with_streaming(true).collect()?; println!("{}", df); // --8<-- [end:streaming] + // --8<-- [start:example] + let query_plan = q1.with_streaming(true).explain(true)?; + println!("{}", query_plan); + // --8<-- [end:example] + + // --8<-- [start:example2] + let q2 = LazyCsvReader::new("docs/data/iris.csv") + .finish()? + .with_columns(vec![col("sepal_length") + .mean() + .over(vec![col("species")]) + .alias("sepal_length_mean")]); + + let query_plan = q2.with_streaming(true).explain(true)?; + println!("{}", query_plan); + // --8<-- [end:example2] + Ok(()) } diff --git a/docs/user-guide/concepts/streaming.md b/docs/user-guide/concepts/streaming.md index 0e0f4dad2327..0365e944f47e 100644 --- a/docs/user-guide/concepts/streaming.md +++ b/docs/user-guide/concepts/streaming.md @@ -16,6 +16,30 @@ Streaming is supported for many operations including: - `with_columns`,`select` - `group_by` - `join` +- `unique` - `sort` - `explode`,`melt` - `scan_csv`,`scan_parquet`,`scan_ipc` + +This list is not exhaustive. Polars is in active development, and more operations can be added without explicit notice. + +### Example with supported operations + +To determine which parts of your query are streaming, use the `explain` method. Below is an example that demonstrates how to inspect the query plan. More information about the query plan can be found in the chapter on the [Lazy API](https://docs.pola.rs/user-guide/lazy/query-plan/). + +{{code_block('user-guide/concepts/streaming', 'example',['explain'])}} + +```python exec="on" result="text" session="user-guide/streaming" +--8<-- "python/user-guide/concepts/streaming.py:import" +--8<-- "python/user-guide/concepts/streaming.py:streaming" +--8<-- "python/user-guide/concepts/streaming.py:example" +``` + +### Example with non-streaming operations + +{{code_block('user-guide/concepts/streaming', 'example2',['explain'])}} + +```python exec="on" result="text" session="user-guide/streaming" +--8<-- "python/user-guide/concepts/streaming.py:import" +--8<-- "python/user-guide/concepts/streaming.py:example2" +```