From 559b9d683bce3d4fd48d7946359fbdaeea1afccc Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Mon, 27 Nov 2023 09:38:11 -0500 Subject: [PATCH] testing ruff's new docstring code snippet formatter --- py-polars/polars/api.py | 13 +- py-polars/polars/config.py | 27 +--- py-polars/polars/dataframe/frame.py | 49 ++---- py-polars/polars/dataframe/group_by.py | 1 - py-polars/polars/datatypes/classes.py | 4 +- py-polars/polars/expr/binary.py | 4 +- py-polars/polars/expr/datetime.py | 70 ++------ py-polars/polars/expr/expr.py | 151 +++++------------- py-polars/polars/expr/list.py | 24 +-- py-polars/polars/expr/name.py | 4 +- py-polars/polars/expr/string.py | 40 ++--- py-polars/polars/expr/struct.py | 4 +- py-polars/polars/functions/as_datatype.py | 18 +-- py-polars/polars/functions/eager.py | 4 +- py-polars/polars/functions/lazy.py | 26 +-- py-polars/polars/io/csv/functions.py | 10 +- py-polars/polars/io/delta.py | 16 +- py-polars/polars/io/iceberg.py | 16 +- py-polars/polars/lazyframe/frame.py | 62 ++----- py-polars/polars/lazyframe/group_by.py | 16 +- py-polars/polars/selectors.py | 1 - py-polars/polars/series/array.py | 8 +- py-polars/polars/series/categorical.py | 2 - py-polars/polars/series/datetime.py | 64 ++------ py-polars/polars/series/list.py | 4 +- py-polars/polars/series/series.py | 4 +- py-polars/polars/series/string.py | 10 +- py-polars/polars/sql/context.py | 5 +- py-polars/polars/string_cache.py | 2 - .../polars/testing/parametric/primitives.py | 1 - .../polars/testing/parametric/strategies.py | 1 - 31 files changed, 162 insertions(+), 499 deletions(-) diff --git a/py-polars/polars/api.py b/py-polars/polars/api.py index 7c8d368bed7f..2a3871f86648 100644 --- a/py-polars/polars/api.py +++ b/py-polars/polars/api.py @@ -99,7 +99,6 @@ def register_expr_namespace(name: str) -> Callable[[type[NS]], type[NS]]: ... ... def nearest(self, p: int) -> pl.Expr: ... return (p ** (self._expr.log(p)).round(0).cast(pl.Int64)).cast(pl.Int64) - ... >>> >>> df = pl.DataFrame([1.4, 24.3, 55.0, 64.001], schema=["n"]) >>> df.select( @@ -155,11 +154,8 @@ def register_dataframe_namespace(name: str) -> Callable[[type[NS]], type[NS]]: ... def by_first_letter_of_column_values(self, col: str) -> list[pl.DataFrame]: ... return [ ... self._df.filter(pl.col(col).str.starts_with(c)) - ... for c in sorted( - ... set(df.select(pl.col(col).str.slice(0, 1)).to_series()) - ... ) + ... for c in sorted(set(df.select(pl.col(col).str.slice(0, 1)).to_series())) ... ] - ... >>> >>> df = pl.DataFrame( ... data=[["xx", 2, 3, 4], ["xy", 4, 5, 6], ["yy", 5, 6, 7], ["yz", 6, 7, 8]], @@ -247,16 +243,12 @@ def register_lazyframe_namespace(name: str) -> Callable[[type[NS]], type[NS]]: ... self._ldf = ldf ... ... def split_by_column_dtypes(self) -> list[pl.LazyFrame]: - ... return [ - ... self._ldf.select(pl.col(tp)) - ... for tp in dict.fromkeys(self._ldf.dtypes) - ... ] + ... return [self._ldf.select(pl.col(tp)) for tp in dict.fromkeys(self._ldf.dtypes)] ... ... def upcast_integer_types(self) -> pl.LazyFrame: ... return self._ldf.with_columns( ... pl.col(tp).cast(pl.Int64) for tp in (pl.Int8, pl.Int16, pl.Int32) ... ) - ... >>> >>> ldf = pl.DataFrame( ... data={"a": [1, 2], "b": [3, 4], "c": [5.6, 6.7]}, @@ -356,7 +348,6 @@ def register_series_namespace(name: str) -> Callable[[type[NS]], type[NS]]: ... ... def cube(self) -> pl.Series: ... return self._s * self._s * self._s - ... >>> >>> s = pl.Series("n", [1.5, 31.0, 42.0, 64.5]) >>> s.math.square().alias("s^2") diff --git a/py-polars/polars/config.py b/py-polars/polars/config.py index 5e9fffde0390..8f52e28dc969 100644 --- a/py-polars/polars/config.py +++ b/py-polars/polars/config.py @@ -94,14 +94,12 @@ class Config(contextlib.ContextDecorator): >>> with pl.Config() as cfg: ... # set verbose for more detailed output within the scope ... cfg.set_verbose(True) # doctest: +IGNORE_RESULT - ... >>> # scope exit - no longer in verbose mode This can also be written more compactly as: >>> with pl.Config(verbose=True): ... pass - ... (The compact format is available for all `Config` methods that take a single value). @@ -111,7 +109,6 @@ class Config(contextlib.ContextDecorator): >>> @pl.Config(verbose=True) ... def test(): ... pass - ... """ @@ -414,7 +411,6 @@ def set_auto_structify(cls, active: bool | None = False) -> type[Config]: >>> df = pl.DataFrame({"v": [1, 2, 3], "v2": [4, 5, 6]}) >>> with pl.Config(set_auto_structify=True): ... out = df.select(pl.all()) - ... >>> out shape: (3, 1) ┌───────────┐ @@ -577,7 +573,6 @@ def set_float_precision(cls, precision: int | None = None) -> type[Config]: >>> df = pl.DataFrame({"const": ["pi", "e"], "value": [pi, e]}) >>> with pl.Config(float_precision=15): ... print(df) - ... shape: (2, 2) ┌───────┬───────────────────┐ │ const ┆ value │ @@ -639,7 +634,6 @@ def set_fmt_float(cls, fmt: FloatFmt | None = "mixed") -> type[Config]: >>> s = pl.Series([1.2304980958725870923, 1e6, 1e-8]) >>> with pl.Config(set_fmt_float="mixed"): ... print(s) - ... shape: (3,) Series: '' [f64] [ @@ -652,7 +646,6 @@ def set_fmt_float(cls, fmt: FloatFmt | None = "mixed") -> type[Config]: >>> with pl.Config(set_fmt_float="full"): ... print(s) - ... shape: (3,) Series: '' [f64] [ @@ -697,7 +690,6 @@ def set_fmt_str_lengths(cls, n: int | None) -> type[Config]: └───────────────────────────────────┴─────┘ >>> with pl.Config(fmt_str_lengths=50): ... print(df) - ... shape: (2, 1) ┌──────────────────────────────────────────────────┐ │ txt │ @@ -752,7 +744,6 @@ def set_fmt_table_cell_list_len(cls, n: int | None) -> type[Config]: └─────────────┘ >>> with pl.Config(fmt_table_cell_list_len=10): ... print(df) - ... shape: (1, 1) ┌────────────────────┐ │ nums │ @@ -810,9 +801,7 @@ def set_tbl_cell_alignment( Examples -------- - >>> df = pl.DataFrame( - ... {"column_abc": [1.0, 2.5, 5.0], "column_xyz": [True, False, True]} - ... ) + >>> df = pl.DataFrame({"column_abc": [1.0, 2.5, 5.0], "column_xyz": [True, False, True]}) >>> pl.Config.set_tbl_cell_alignment("RIGHT") # doctest: +IGNORE_RESULT >>> print(df) shape: (3, 2) @@ -907,7 +896,6 @@ def set_tbl_cols(cls, n: int | None) -> type[Config]: ... cfg.set_tbl_cols(5) ... df = pl.DataFrame({str(i): [i] for i in range(100)}) ... print(df) - ... shape: (1, 100) ┌─────┬─────┬─────┬───┬─────┬─────┐ @@ -920,7 +908,6 @@ def set_tbl_cols(cls, n: int | None) -> type[Config]: >>> with pl.Config(tbl_cols=10): ... print(df) - ... shape: (1, 100) ┌─────┬─────┬─────┬─────┬─────┬───┬─────┬─────┬─────┬─────┬─────┐ │ 0 ┆ 1 ┆ 2 ┆ 3 ┆ 4 ┆ … ┆ 95 ┆ 96 ┆ 97 ┆ 98 ┆ 99 │ @@ -1032,9 +1019,7 @@ def set_tbl_formatting( Examples -------- - >>> df = pl.DataFrame( - ... {"abc": [-2.5, 5.0], "mno": ["hello", "world"], "xyz": [True, False]} - ... ) + >>> df = pl.DataFrame({"abc": [-2.5, 5.0], "mno": ["hello", "world"], "xyz": [True, False]}) >>> with pl.Config( ... tbl_formatting="ASCII_MARKDOWN", ... tbl_hide_column_data_types=True, @@ -1201,12 +1186,9 @@ def set_tbl_rows(cls, n: int | None) -> type[Config]: Examples -------- - >>> df = pl.DataFrame( - ... {"abc": [1.0, 2.5, 3.5, 5.0], "xyz": [True, False, True, False]} - ... ) + >>> df = pl.DataFrame({"abc": [1.0, 2.5, 3.5, 5.0], "xyz": [True, False, True, False]}) >>> with pl.Config(tbl_rows=2): ... print(df) - ... shape: (4, 2) ┌─────┬───────┐ │ abc ┆ xyz │ @@ -1261,7 +1243,6 @@ def set_trim_decimal_zeros(cls, active: bool | None = True) -> type[Config]: ... ) >>> with pl.Config(trim_decimal_zeros=False): ... print(df) - ... shape: (2, 1) ┌──────────────┐ │ d │ @@ -1273,7 +1254,6 @@ def set_trim_decimal_zeros(cls, active: bool | None = True) -> type[Config]: └──────────────┘ >>> with pl.Config(trim_decimal_zeros=True): ... print(df) - ... shape: (2, 1) ┌──────────────┐ │ d │ @@ -1298,7 +1278,6 @@ def set_verbose(cls, active: bool | None = True) -> type[Config]: >>> pl.Config.set_verbose(True) # doctest: +SKIP >>> with pl.Config(verbose=True): # doctest: +SKIP ... do_polars_operations() - ... """ if active is None: os.environ.pop("POLARS_VERBOSE", None) diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py index 3976e866c328..32040534f73f 100644 --- a/py-polars/polars/dataframe/frame.py +++ b/py-polars/polars/dataframe/frame.py @@ -333,7 +333,6 @@ class DataFrame: >>> class MyDataFrame(pl.DataFrame): ... pass - ... >>> isinstance(MyDataFrame().lazy().collect(), MyDataFrame) False @@ -1889,9 +1888,7 @@ def to_arrow(self) -> pa.Table: Examples -------- - >>> df = pl.DataFrame( - ... {"foo": [1, 2, 3, 4, 5, 6], "bar": ["a", "b", "c", "d", "e", "f"]} - ... ) + >>> df = pl.DataFrame({"foo": [1, 2, 3, 4, 5, 6], "bar": ["a", "b", "c", "d", "e", "f"]}) >>> df.to_arrow() pyarrow.Table foo: int64 @@ -2188,9 +2185,7 @@ def to_pandas( # noqa: D417 bar float64 ham object dtype: object - >>> pandas_df2_pa = df2.to_pandas( - ... use_pyarrow_extension_array=True - ... ) # doctest: +SKIP + >>> pandas_df2_pa = df2.to_pandas(use_pyarrow_extension_array=True) # doctest: +SKIP >>> pandas_df2_pa # doctest: +SKIP foo bar ham 0 1 6 @@ -2940,7 +2935,6 @@ def write_excel( ... ) ... ws.write(2, 1, "Basic/default conditional formatting", fmt_title) ... ws.write(len(df) + 6, 1, "Customised conditional formatting", fmt_title) - ... Export a table containing two different types of sparklines. Use default options for the "trend" sparkline and customised options (and positioning) @@ -3762,9 +3756,7 @@ def transpose( Include the header as a separate column - >>> df.transpose( - ... include_header=True, header_name="foo", column_names=["a", "b", "c"] - ... ) + >>> df.transpose(include_header=True, header_name="foo", column_names=["a", "b", "c"]) shape: (2, 4) ┌─────┬─────┬─────┬─────┐ │ foo ┆ a ┆ b ┆ c │ @@ -3783,7 +3775,6 @@ def transpose( ... while True: ... yield f"{base_name}{count}" ... count += 1 - ... >>> df.transpose(include_header=False, column_names=name_generator()) shape: (2, 3) ┌─────────────┬─────────────┬─────────────┐ @@ -3862,9 +3853,7 @@ def rename(self, mapping: dict[str, str]) -> DataFrame: Examples -------- - >>> df = pl.DataFrame( - ... {"foo": [1, 2, 3], "bar": [6, 7, 8], "ham": ["a", "b", "c"]} - ... ) + >>> df = pl.DataFrame({"foo": [1, 2, 3], "bar": [6, 7, 8], "ham": ["a", "b", "c"]}) >>> df.rename({"foo": "apple"}) shape: (3, 3) ┌───────┬─────┬─────┐ @@ -4250,9 +4239,7 @@ def get_column_index(self, name: str) -> int: Examples -------- - >>> df = pl.DataFrame( - ... {"foo": [1, 2, 3], "bar": [6, 7, 8], "ham": ["a", "b", "c"]} - ... ) + >>> df = pl.DataFrame({"foo": [1, 2, 3], "bar": [6, 7, 8], "ham": ["a", "b", "c"]}) >>> df.get_column_index("ham") 2 @@ -4954,7 +4941,6 @@ def pipe( -------- >>> def cast_str_to_int(data, col_name): ... return data.with_columns(pl.col(col_name).cast(pl.Int64)) - ... >>> df = pl.DataFrame({"a": [1, 2, 3, 4], "b": ["10", "20", "30", "40"]}) >>> df.pipe(cast_str_to_int, col_name="b") shape: (4, 2) @@ -5134,7 +5120,6 @@ def group_by( >>> for name, data in df.group_by("a"): # doctest: +SKIP ... print(name) ... print(data) - ... a shape: (2, 3) ┌─────┬─────┬─────┐ @@ -5515,9 +5500,9 @@ def group_by_dynamic( The window boundaries can also be added to the aggregation result - >>> df.group_by_dynamic( - ... "time", every="1h", include_boundaries=True, closed="right" - ... ).agg(pl.col("n").mean()) + >>> df.group_by_dynamic("time", every="1h", include_boundaries=True, closed="right").agg( + ... pl.col("n").mean() + ... ) shape: (4, 4) ┌─────────────────────┬─────────────────────┬─────────────────────┬─────┐ │ _lower_boundary ┆ _upper_boundary ┆ time ┆ n │ @@ -5724,9 +5709,9 @@ def upsample( ... "values": [0, 1, 2, 3], ... } ... ).set_sorted("time") - >>> df.upsample( - ... time_column="time", every="1mo", by="groups", maintain_order=True - ... ).select(pl.all().forward_fill()) + >>> df.upsample(time_column="time", every="1mo", by="groups", maintain_order=True).select( + ... pl.all().forward_fill() + ... ) shape: (7, 3) ┌─────────────────────┬────────┬────────┐ │ time ┆ groups ┆ values │ @@ -7779,7 +7764,6 @@ def select( ... df.select( ... is_odd=(pl.col(pl.INTEGER_DTYPES) % 2).name.suffix("_is_odd"), ... ) - ... shape: (3, 1) ┌───────────┐ │ is_odd │ @@ -7954,7 +7938,6 @@ def with_columns( ... df.drop("c").with_columns( ... diffs=pl.col(["a", "b"]).diff().name.suffix("_diff"), ... ) - ... shape: (4, 3) ┌─────┬──────┬─────────────┐ │ a ┆ b ┆ diffs │ @@ -9661,7 +9644,6 @@ def iter_slices(self, n_rows: int = 10_000) -> Iterator[DataFrame]: ... ) >>> for idx, frame in enumerate(df.iter_slices()): ... print(f"{type(frame).__name__}:[{idx}]:{len(frame)}") - ... DataFrame:[0]:10000 DataFrame:[1]:7500 @@ -9671,7 +9653,6 @@ def iter_slices(self, n_rows: int = 10_000) -> Iterator[DataFrame]: >>> for frame in df.iter_slices(n_rows=15_000): ... record_batch = frame.to_arrow().to_batches()[0] ... print(f"{record_batch.schema}\n<< {len(record_batch)}") - ... a: int32 b: date32[day] c: large_string @@ -9968,9 +9949,7 @@ def merge_sorted(self, other: DataFrame, key: str) -> DataFrame: Examples -------- - >>> df0 = pl.DataFrame( - ... {"name": ["steve", "elise", "bob"], "age": [42, 44, 18]} - ... ).sort("age") + >>> df0 = pl.DataFrame({"name": ["steve", "elise", "bob"], "age": [42, 44, 18]}).sort("age") >>> df0 shape: (3, 2) ┌───────┬─────┐ @@ -10163,9 +10142,7 @@ def update( Update `df` values including null values in `new_df`, using an outer join strategy that defines explicit join columns in each frame: - >>> df.update( - ... new_df, left_on="A", right_on="C", how="outer", include_nulls=True - ... ) + >>> df.update(new_df, left_on="A", right_on="C", how="outer", include_nulls=True) shape: (5, 2) ┌─────┬──────┐ │ A ┆ B │ diff --git a/py-polars/polars/dataframe/group_by.py b/py-polars/polars/dataframe/group_by.py index 8e648b682093..faedfe8d581b 100644 --- a/py-polars/polars/dataframe/group_by.py +++ b/py-polars/polars/dataframe/group_by.py @@ -73,7 +73,6 @@ def __iter__(self) -> Self: >>> for name, data in df.group_by("foo"): # doctest: +SKIP ... print(name) ... print(data) - ... a shape: (2, 2) ┌─────┬─────┐ diff --git a/py-polars/polars/datatypes/classes.py b/py-polars/polars/datatypes/classes.py index bec9d0983300..66bb7233adbe 100644 --- a/py-polars/polars/datatypes/classes.py +++ b/py-polars/polars/datatypes/classes.py @@ -632,9 +632,7 @@ def __init__( # noqa: D417 Examples -------- - >>> s = pl.Series( - ... "a", [[1, 2], [4, 3]], dtype=pl.Array(inner=pl.Int64, width=2) - ... ) + >>> s = pl.Series("a", [[1, 2], [4, 3]], dtype=pl.Array(inner=pl.Int64, width=2)) >>> s shape: (2,) Series: 'a' [array[i64, 2]] diff --git a/py-polars/polars/expr/binary.py b/py-polars/polars/expr/binary.py index b3fbcbdf850d..638e85b69f35 100644 --- a/py-polars/polars/expr/binary.py +++ b/py-polars/polars/expr/binary.py @@ -143,9 +143,7 @@ def starts_with(self, prefix: IntoExpr) -> Expr: >>> colors.select( ... "name", ... pl.col("code").bin.starts_with(b"\xff").alias("starts_with_lit"), - ... pl.col("code") - ... .bin.starts_with(pl.col("prefix")) - ... .alias("starts_with_expr"), + ... pl.col("code").bin.starts_with(pl.col("prefix")).alias("starts_with_expr"), ... ) shape: (3, 3) ┌────────┬─────────────────┬──────────────────┐ diff --git a/py-polars/polars/expr/datetime.py b/py-polars/polars/expr/datetime.py index 2cd2107cc386..4be642af4585 100644 --- a/py-polars/polars/expr/datetime.py +++ b/py-polars/polars/expr/datetime.py @@ -152,9 +152,7 @@ def truncate( >>> df = pl.datetime_range( ... datetime(2001, 1, 1), datetime(2001, 1, 1, 1), "10m", eager=True ... ).to_frame() - >>> df.select( - ... "datetime", pl.col("datetime").dt.truncate("30m").alias("truncate") - ... ) + >>> df.select("datetime", pl.col("datetime").dt.truncate("30m").alias("truncate")) shape: (7, 2) ┌─────────────────────┬─────────────────────┐ │ datetime ┆ truncate │ @@ -418,9 +416,7 @@ def to_string(self, format: str) -> Expr: ... } ... ) >>> df.with_columns( - ... pl.col("datetime") - ... .dt.to_string("%Y/%m/%d %H:%M:%S") - ... .alias("datetime_string") + ... pl.col("datetime").dt.to_string("%Y/%m/%d %H:%M:%S").alias("datetime_string") ... ) shape: (3, 2) ┌─────────────────────┬─────────────────────┐ @@ -469,9 +465,7 @@ def strftime(self, format: str) -> Expr: ... } ... ) >>> df.with_columns( - ... pl.col("datetime") - ... .dt.strftime("%Y/%m/%d %H:%M:%S") - ... .alias("datetime_string") + ... pl.col("datetime").dt.strftime("%Y/%m/%d %H:%M:%S").alias("datetime_string") ... ) shape: (3, 2) ┌─────────────────────┬─────────────────────┐ @@ -503,9 +497,7 @@ def year(self) -> Expr: Examples -------- >>> from datetime import date - >>> df = pl.DataFrame( - ... {"date": [date(1977, 1, 1), date(1978, 1, 1), date(1979, 1, 1)]} - ... ) + >>> df = pl.DataFrame({"date": [date(1977, 1, 1), date(1978, 1, 1), date(1979, 1, 1)]}) >>> df.select( ... "date", ... pl.col("date").dt.year().alias("calendar_year"), @@ -539,9 +531,7 @@ def is_leap_year(self) -> Expr: Examples -------- >>> from datetime import date - >>> df = pl.DataFrame( - ... {"date": [date(2000, 1, 1), date(2001, 1, 1), date(2002, 1, 1)]} - ... ) + >>> df = pl.DataFrame({"date": [date(2000, 1, 1), date(2001, 1, 1), date(2002, 1, 1)]}) >>> df.select(pl.col("date").dt.is_leap_year()) shape: (3, 1) ┌───────┐ @@ -574,9 +564,7 @@ def iso_year(self) -> Expr: Examples -------- >>> from datetime import date - >>> df = pl.DataFrame( - ... {"date": [date(1977, 1, 1), date(1978, 1, 1), date(1979, 1, 1)]} - ... ) + >>> df = pl.DataFrame({"date": [date(1977, 1, 1), date(1978, 1, 1), date(1979, 1, 1)]}) >>> df.select( ... "date", ... pl.col("date").dt.year().alias("calendar_year"), @@ -612,9 +600,7 @@ def quarter(self) -> Expr: Examples -------- >>> from datetime import date - >>> df = pl.DataFrame( - ... {"date": [date(2001, 1, 1), date(2001, 6, 30), date(2001, 12, 27)]} - ... ) + >>> df = pl.DataFrame({"date": [date(2001, 1, 1), date(2001, 6, 30), date(2001, 12, 27)]}) >>> df.with_columns(pl.col("date").dt.quarter().alias("quarter")) shape: (3, 2) ┌────────────┬─────────┐ @@ -647,9 +633,7 @@ def month(self) -> Expr: Examples -------- >>> from datetime import date - >>> df = pl.DataFrame( - ... {"date": [date(2001, 1, 1), date(2001, 6, 30), date(2001, 12, 27)]} - ... ) + >>> df = pl.DataFrame({"date": [date(2001, 1, 1), date(2001, 6, 30), date(2001, 12, 27)]}) >>> df.with_columns(pl.col("date").dt.month().alias("month")) shape: (3, 2) ┌────────────┬───────┐ @@ -682,9 +666,7 @@ def week(self) -> Expr: Examples -------- >>> from datetime import date - >>> df = pl.DataFrame( - ... {"date": [date(2001, 1, 1), date(2001, 6, 30), date(2001, 12, 27)]} - ... ) + >>> df = pl.DataFrame({"date": [date(2001, 1, 1), date(2001, 6, 30), date(2001, 12, 27)]}) >>> df.with_columns(pl.col("date").dt.week().alias("week")) shape: (3, 2) ┌────────────┬──────┐ @@ -722,11 +704,7 @@ def weekday(self) -> Expr: -------- >>> from datetime import timedelta, date >>> df = pl.DataFrame( - ... { - ... "date": pl.date_range( - ... date(2001, 12, 22), date(2001, 12, 25), eager=True - ... ) - ... } + ... {"date": pl.date_range(date(2001, 12, 22), date(2001, 12, 25), eager=True)} ... ) >>> df.with_columns( ... pl.col("date").dt.weekday().alias("weekday"), @@ -771,11 +749,7 @@ def day(self) -> Expr: -------- >>> from datetime import timedelta, date >>> df = pl.DataFrame( - ... { - ... "date": pl.date_range( - ... date(2001, 12, 22), date(2001, 12, 25), eager=True - ... ) - ... } + ... {"date": pl.date_range(date(2001, 12, 22), date(2001, 12, 25), eager=True)} ... ) >>> df.with_columns( ... pl.col("date").dt.weekday().alias("weekday"), @@ -820,11 +794,7 @@ def ordinal_day(self) -> Expr: -------- >>> from datetime import timedelta, date >>> df = pl.DataFrame( - ... { - ... "date": pl.date_range( - ... date(2001, 12, 22), date(2001, 12, 25), eager=True - ... ) - ... } + ... {"date": pl.date_range(date(2001, 12, 22), date(2001, 12, 25), eager=True)} ... ) >>> df.with_columns( ... pl.col("date").dt.weekday().alias("weekday"), @@ -1011,9 +981,7 @@ def second(self, *, fractional: bool = False) -> Expr: │ 2000-01-01 00:00:03.111110 ┆ 3 │ │ 2000-01-01 00:00:05.765431 ┆ 5 │ └────────────────────────────┴────────┘ - >>> df.with_columns( - ... pl.col("datetime").dt.second(fractional=True).alias("second") - ... ) + >>> df.with_columns(pl.col("datetime").dt.second(fractional=True).alias("second")) shape: (3, 2) ┌────────────────────────────┬──────────┐ │ datetime ┆ second │ @@ -1123,9 +1091,7 @@ def epoch(self, time_unit: EpochTimeUnit = "us") -> Expr: Examples -------- >>> from datetime import date - >>> df = pl.date_range( - ... date(2001, 1, 1), date(2001, 1, 3), eager=True - ... ).to_frame() + >>> df = pl.date_range(date(2001, 1, 1), date(2001, 1, 3), eager=True).to_frame() >>> df.with_columns( ... pl.col("date").dt.epoch().alias("epoch_ns"), ... pl.col("date").dt.epoch(time_unit="s").alias("epoch_s"), @@ -1165,9 +1131,7 @@ def timestamp(self, time_unit: TimeUnit = "us") -> Expr: Examples -------- >>> from datetime import date - >>> df = pl.date_range( - ... date(2001, 1, 1), date(2001, 1, 3), eager=True - ... ).to_frame() + >>> df = pl.date_range(date(2001, 1, 1), date(2001, 1, 3), eager=True).to_frame() >>> df.with_columns( ... pl.col("date").dt.timestamp().alias("timestamp_ns"), ... pl.col("date").dt.timestamp("ms").alias("timestamp_ms"), @@ -1298,9 +1262,7 @@ def convert_time_zone(self, time_zone: str) -> Expr: >>> df.select( ... [ ... pl.col("date"), - ... pl.col("date") - ... .dt.convert_time_zone(time_zone="Europe/London") - ... .alias("London"), + ... pl.col("date").dt.convert_time_zone(time_zone="Europe/London").alias("London"), ... ] ... ) shape: (3, 2) diff --git a/py-polars/polars/expr/expr.py b/py-polars/polars/expr/expr.py index d5507cec680b..38e8e2aad96e 100644 --- a/py-polars/polars/expr/expr.py +++ b/py-polars/polars/expr/expr.py @@ -305,10 +305,7 @@ def to_physical(self) -> Self: >>> pl.DataFrame({"vals": ["a", "x", None, "a"]}).with_columns( ... [ ... pl.col("vals").cast(pl.Categorical), - ... pl.col("vals") - ... .cast(pl.Categorical) - ... .to_physical() - ... .alias("vals_physical"), + ... pl.col("vals").cast(pl.Categorical).to_physical().alias("vals_physical"), ... ] ... ) shape: (4, 2) @@ -653,9 +650,7 @@ def map_alias(self, function: Callable[[str], str]) -> Self: ... "B_reverse": ["z", "y", "x"], ... } ... ) - >>> df.with_columns( - ... pl.all().reverse().name.map(lambda c: c.rstrip("_reverse").lower()) - ... ) + >>> df.with_columns(pl.all().reverse().name.map(lambda c: c.rstrip("_reverse").lower())) shape: (3, 4) ┌───────────┬───────────┬─────┬─────┐ │ A_reverse ┆ B_reverse ┆ a ┆ b │ @@ -963,9 +958,7 @@ def pipe( >>> >>> df = pl.DataFrame({"val": ["a: 1", "b: 2", "c: 3", "d: 4"]}) >>> df.with_columns( - ... udfs=( - ... pl.col("val").pipe(extract_number).pipe(scale_negative_even, n=5) - ... ), + ... udfs=(pl.col("val").pipe(extract_number).pipe(scale_negative_even, n=5)), ... ) shape: (4, 2) ┌──────┬──────┐ @@ -1509,10 +1502,7 @@ def cum_sum(self, *, reverse: bool = False) -> Self: >>> df = pl.DataFrame({"values": [None, 10, None, 8, 9, None, 16, None]}) >>> df.with_columns( ... pl.col("values").cum_sum().alias("value_cum_sum"), - ... pl.col("values") - ... .cum_sum() - ... .forward_fill() - ... .alias("value_cum_sum_all_filled"), + ... pl.col("values").cum_sum().forward_fill().alias("value_cum_sum_all_filled"), ... ) shape: (8, 3) ┌────────┬───────────────┬──────────────────────────┐ @@ -2288,9 +2278,7 @@ def sort_by( When sorting in a group by context, the groups are sorted. - >>> df.group_by("group").agg( - ... pl.col("value1").sort_by("value2") - ... ) # doctest: +IGNORE_RESULT + >>> df.group_by("group").agg(pl.col("value1").sort_by("value2")) # doctest: +IGNORE_RESULT shape: (2, 2) ┌───────┬───────────┐ │ group ┆ value1 │ @@ -2304,9 +2292,7 @@ def sort_by( Take a single row from each group where a column attains its minimal value within that group. - >>> df.group_by("group").agg( - ... pl.all().sort_by("value2").first() - ... ) # doctest: +IGNORE_RESULT + >>> df.group_by("group").agg(pl.all().sort_by("value2").first()) # doctest: +IGNORE_RESULT shape: (2, 3) ┌───────┬────────┬────────┐ │ group ┆ value1 ┆ value2 | @@ -2362,9 +2348,7 @@ def gather( ... "value": [1, 98, 2, 3, 99, 4], ... } ... ) - >>> df.group_by("group", maintain_order=True).agg( - ... pl.col("value").gather([2, 1]) - ... ) + >>> df.group_by("group", maintain_order=True).agg(pl.col("value").gather([2, 1])) shape: (2, 2) ┌───────┬───────────┐ │ group ┆ value │ @@ -3657,9 +3641,7 @@ def cut( Divide a column into three categories. >>> df = pl.DataFrame({"foo": [-2, -1, 0, 1, 2]}) - >>> df.with_columns( - ... pl.col("foo").cut([-1, 1], labels=["a", "b", "c"]).alias("cut") - ... ) + >>> df.with_columns(pl.col("foo").cut([-1, 1], labels=["a", "b", "c"]).alias("cut")) shape: (5, 2) ┌─────┬─────┐ │ foo ┆ cut │ @@ -3675,9 +3657,9 @@ def cut( Add both the category and the breakpoint. - >>> df.with_columns( - ... pl.col("foo").cut([-1, 1], include_breaks=True).alias("cut") - ... ).unnest("cut") + >>> df.with_columns(pl.col("foo").cut([-1, 1], include_breaks=True).alias("cut")).unnest( + ... "cut" + ... ) shape: (5, 3) ┌─────┬──────┬────────────┐ │ foo ┆ brk ┆ foo_bin │ @@ -3745,9 +3727,7 @@ def qcut( probabilities. >>> df = pl.DataFrame({"foo": [-2, -1, 0, 1, 2]}) - >>> df.with_columns( - ... pl.col("foo").qcut([0.25, 0.75], labels=["a", "b", "c"]).alias("qcut") - ... ) + >>> df.with_columns(pl.col("foo").qcut([0.25, 0.75], labels=["a", "b", "c"]).alias("qcut")) shape: (5, 2) ┌─────┬──────┐ │ foo ┆ qcut │ @@ -3764,9 +3744,7 @@ def qcut( Divide a column into two categories using uniform quantile probabilities. >>> df.with_columns( - ... pl.col("foo") - ... .qcut(2, labels=["low", "high"], left_closed=True) - ... .alias("qcut") + ... pl.col("foo").qcut(2, labels=["low", "high"], left_closed=True).alias("qcut") ... ) shape: (5, 2) ┌─────┬──────┐ @@ -4115,9 +4093,7 @@ def map_elements( In a GroupBy context, each element of the column is itself a Series: - >>> ( - ... df.lazy().group_by("b").agg(pl.col("a")).collect() - ... ) # doctest: +IGNORE_RESULT + >>> (df.lazy().group_by("b").agg(pl.col("a")).collect()) # doctest: +IGNORE_RESULT shape: (3, 2) ┌─────┬───────────┐ │ b ┆ a │ @@ -4132,10 +4108,7 @@ def map_elements( Therefore, from the user's point-of-view, the function is applied per-group: >>> ( - ... df.lazy() - ... .group_by("b") - ... .agg(pl.col("a").map_elements(lambda x: x.sum())) - ... .collect() + ... df.lazy().group_by("b").agg(pl.col("a").map_elements(lambda x: x.sum())).collect() ... ) # doctest: +IGNORE_RESULT shape: (3, 2) ┌─────┬─────┐ @@ -4151,10 +4124,7 @@ def map_elements( Tip: again, it is better to implement this with an expression: >>> ( - ... df.lazy() - ... .group_by("b", maintain_order=True) - ... .agg(pl.col("a").sum()) - ... .collect() + ... df.lazy().group_by("b", maintain_order=True).agg(pl.col("a").sum()).collect() ... ) # doctest: +IGNORE_RESULT Window function application using `over` will behave as a GroupBy @@ -4187,9 +4157,7 @@ def map_elements( >>> df.with_columns( ... scaled=(pl.col("val") * pl.col("val").count()).over("key"), - ... ).sort( - ... "key" - ... ) # doctest: +IGNORE_RESULT + ... ).sort("key") # doctest: +IGNORE_RESULT """ # input x: Series of type list containing the group values @@ -4878,9 +4846,7 @@ def add(self, other: Any) -> Self: │ 5 ┆ 7 ┆ 125 │ └─────┴───────┴────────┘ - >>> df = pl.DataFrame( - ... {"x": ["a", "d", "g"], "y": ["b", "e", "h"], "z": ["c", "f", "i"]} - ... ) + >>> df = pl.DataFrame({"x": ["a", "d", "g"], "y": ["b", "e", "h"], "z": ["c", "f", "i"]}) >>> df.with_columns(pl.col("x").add(pl.col("y")).add(pl.col("z")).alias("xyz")) shape: (3, 4) ┌─────┬─────┬─────┬─────┐ @@ -5047,9 +5013,7 @@ def truediv(self, other: Any) -> Self: Examples -------- - >>> df = pl.DataFrame( - ... data={"x": [-2, -1, 0, 1, 2], "y": [0.5, 0.0, 0.0, -4.0, -0.5]} - ... ) + >>> df = pl.DataFrame(data={"x": [-2, -1, 0, 1, 2], "y": [0.5, 0.0, 0.0, -4.0, -0.5]}) >>> df.with_columns( ... pl.col("x").truediv(2).alias("x/2"), ... pl.col("x").truediv(pl.col("y")).alias("x/y"), @@ -5113,9 +5077,7 @@ def xor(self, other: Any) -> Self: Examples -------- - >>> df = pl.DataFrame( - ... {"x": [True, False, True, False], "y": [True, True, False, False]} - ... ) + >>> df = pl.DataFrame({"x": [True, False, True, False], "y": [True, True, False, False]}) >>> df.with_columns(pl.col("x").xor(pl.col("y")).alias("x ^ y")) shape: (4, 3) ┌───────┬───────┬───────┐ @@ -5140,10 +5102,7 @@ def xor(self, other: Any) -> Self: ... pl.col("x").map_elements(binary_string).alias("bin_x"), ... pl.col("y").map_elements(binary_string).alias("bin_y"), ... pl.col("x").xor(pl.col("y")).alias("xor_xy"), - ... pl.col("x") - ... .xor(pl.col("y")) - ... .map_elements(binary_string) - ... .alias("bin_xor_xy"), + ... pl.col("x").xor(pl.col("y")).map_elements(binary_string).alias("bin_xor_xy"), ... ) shape: (4, 6) ┌─────┬─────┬──────────┬──────────┬────────┬────────────┐ @@ -5176,9 +5135,7 @@ def is_in(self, other: Expr | Collection[Any] | Series) -> Self: Examples -------- - >>> df = pl.DataFrame( - ... {"sets": [[1, 2, 3], [1, 2], [9, 10]], "optional_members": [1, 2, 3]} - ... ) + >>> df = pl.DataFrame({"sets": [[1, 2, 3], [1, 2], [9, 10]], "optional_members": [1, 2, 3]}) >>> df.with_columns(contains=pl.col("optional_members").is_in("sets")) shape: (3, 3) ┌───────────┬──────────────────┬──────────┐ @@ -5288,9 +5245,7 @@ def is_between( Use the `closed` argument to include or exclude the values at the bounds: - >>> df.with_columns( - ... pl.col("num").is_between(2, 4, closed="left").alias("is_between") - ... ) + >>> df.with_columns(pl.col("num").is_between(2, 4, closed="left").alias("is_between")) shape: (5, 2) ┌─────┬────────────┐ │ num ┆ is_between │ @@ -5310,9 +5265,7 @@ def is_between( >>> df = pl.DataFrame({"a": ["a", "b", "c", "d", "e"]}) >>> df.with_columns( - ... pl.col("a") - ... .is_between(pl.lit("a"), pl.lit("c"), closed="both") - ... .alias("is_between") + ... pl.col("a").is_between(pl.lit("a"), pl.lit("c"), closed="both").alias("is_between") ... ) shape: (5, 2) ┌─────┬────────────┐ @@ -5525,9 +5478,9 @@ def interpolate(self, method: InterpolationMethod = "linear") -> Self: ... } ... ) # Interpolate from this to the new grid >>> df_new_grid = pl.DataFrame({"grid_points": range(1, 11)}) - >>> df_new_grid.join( - ... df_original_grid, on="grid_points", how="left" - ... ).with_columns(pl.col("values").interpolate()) + >>> df_new_grid.join(df_original_grid, on="grid_points", how="left").with_columns( + ... pl.col("values").interpolate() + ... ) shape: (10, 2) ┌─────────────┬────────┐ │ grid_points ┆ values │ @@ -5664,9 +5617,7 @@ def rolling_min( Specify weights to multiply the values in the window with: >>> df.with_columns( - ... rolling_min=pl.col("A").rolling_min( - ... window_size=2, weights=[0.25, 0.75] - ... ), + ... rolling_min=pl.col("A").rolling_min(window_size=2, weights=[0.25, 0.75]), ... ) shape: (6, 2) ┌─────┬─────────────┐ @@ -5871,9 +5822,7 @@ def rolling_max( Specify weights to multiply the values in the window with: >>> df.with_columns( - ... rolling_max=pl.col("A").rolling_max( - ... window_size=2, weights=[0.25, 0.75] - ... ), + ... rolling_max=pl.col("A").rolling_max(window_size=2, weights=[0.25, 0.75]), ... ) shape: (6, 2) ┌─────┬─────────────┐ @@ -6109,9 +6058,7 @@ def rolling_mean( Specify weights to multiply the values in the window with: >>> df.with_columns( - ... rolling_mean=pl.col("A").rolling_mean( - ... window_size=2, weights=[0.25, 0.75] - ... ), + ... rolling_mean=pl.col("A").rolling_mean(window_size=2, weights=[0.25, 0.75]), ... ) shape: (6, 2) ┌─────┬──────────────┐ @@ -6349,9 +6296,7 @@ def rolling_sum( Specify weights to multiply the values in the window with: >>> df.with_columns( - ... rolling_sum=pl.col("A").rolling_sum( - ... window_size=2, weights=[0.25, 0.75] - ... ), + ... rolling_sum=pl.col("A").rolling_sum(window_size=2, weights=[0.25, 0.75]), ... ) shape: (6, 2) ┌─────┬─────────────┐ @@ -6586,9 +6531,7 @@ def rolling_std( Specify weights to multiply the values in the window with: >>> df.with_columns( - ... rolling_std=pl.col("A").rolling_std( - ... window_size=2, weights=[0.25, 0.75] - ... ), + ... rolling_std=pl.col("A").rolling_std(window_size=2, weights=[0.25, 0.75]), ... ) shape: (6, 2) ┌─────┬─────────────┐ @@ -6830,9 +6773,7 @@ def rolling_var( Specify weights to multiply the values in the window with: >>> df.with_columns( - ... rolling_var=pl.col("A").rolling_var( - ... window_size=2, weights=[0.25, 0.75] - ... ), + ... rolling_var=pl.col("A").rolling_var(window_size=2, weights=[0.25, 0.75]), ... ) shape: (6, 2) ┌─────┬─────────────┐ @@ -7071,9 +7012,7 @@ def rolling_median( Specify weights for the values in each window: >>> df.with_columns( - ... rolling_median=pl.col("A").rolling_median( - ... window_size=2, weights=[0.25, 0.75] - ... ), + ... rolling_median=pl.col("A").rolling_median(window_size=2, weights=[0.25, 0.75]), ... ) shape: (6, 2) ┌─────┬────────────────┐ @@ -7218,9 +7157,7 @@ def rolling_quantile( -------- >>> df = pl.DataFrame({"A": [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]}) >>> df.with_columns( - ... rolling_quantile=pl.col("A").rolling_quantile( - ... quantile=0.25, window_size=4 - ... ), + ... rolling_quantile=pl.col("A").rolling_quantile(quantile=0.25, window_size=4), ... ) shape: (6, 2) ┌─────┬──────────────────┐ @@ -8720,9 +8657,7 @@ def value_counts(self, *, sort: bool = False, parallel: bool = False) -> Self: Examples -------- - >>> df = pl.DataFrame( - ... {"color": ["red", "blue", "red", "green", "blue", "blue"]} - ... ) + >>> df = pl.DataFrame({"color": ["red", "blue", "red", "green", "blue", "blue"]}) >>> df.select(pl.col("color").value_counts()) # doctest: +IGNORE_RESULT shape: (3, 1) ┌─────────────┐ @@ -8903,11 +8838,7 @@ def cumulative_eval( -------- >>> df = pl.DataFrame({"values": [1, 2, 3, 4, 5]}) >>> df.select( - ... [ - ... pl.col("values").cumulative_eval( - ... pl.element().first() - pl.element().last() ** 2 - ... ) - ... ] + ... [pl.col("values").cumulative_eval(pl.element().first() - pl.element().last() ** 2)] ... ) shape: (5, 1) ┌────────┐ @@ -9070,9 +9001,7 @@ def replace( ... None: "unspecified", ... } >>> df.with_columns( - ... pl.col("country_code") - ... .replace(country_code_map, default=None) - ... .alias("replaced") + ... pl.col("country_code").replace(country_code_map, default=None).alias("replaced") ... ) shape: (4, 2) ┌──────────────┬─────────────┐ @@ -9803,9 +9732,7 @@ def cat(self) -> ExprCatNameSpace: Examples -------- - >>> df = pl.DataFrame({"values": ["a", "b"]}).select( - ... pl.col("values").cast(pl.Categorical) - ... ) + >>> df = pl.DataFrame({"values": ["a", "b"]}).select(pl.col("values").cast(pl.Categorical)) >>> df.select(pl.col("values").cat.set_ordering(ordering="physical")) shape: (2, 1) ┌────────┐ diff --git a/py-polars/polars/expr/list.py b/py-polars/polars/expr/list.py index a6a7e0b46efd..0c300d50d0d9 100644 --- a/py-polars/polars/expr/list.py +++ b/py-polars/polars/expr/list.py @@ -600,9 +600,7 @@ def join(self, separator: IntoExpr) -> Expr: │ ["x", "y"] ┆ x y │ └─────────────────┴───────┘ - >>> df = pl.DataFrame( - ... {"s": [["a", "b", "c"], ["x", "y"]], "separator": ["*", "_"]} - ... ) + >>> df = pl.DataFrame({"s": [["a", "b", "c"], ["x", "y"]], "separator": ["*", "_"]}) >>> df.with_columns(join=pl.col("s").list.join(pl.col("separator"))) shape: (2, 3) ┌─────────────────┬───────────┬───────┐ @@ -1008,9 +1006,7 @@ def to_struct( Convert list to struct with default field name assignment: >>> df = pl.DataFrame({"n": [[0, 1], [0, 1, 2]]}) - >>> df.with_columns( - ... struct=pl.col("n").list.to_struct() - ... ) # doctest: +IGNORE_RESULT + >>> df.with_columns(struct=pl.col("n").list.to_struct()) # doctest: +IGNORE_RESULT shape: (2, 2) ┌───────────┬───────────┐ │ n ┆ struct │ @@ -1040,16 +1036,12 @@ def to_struct( Convert list to struct with field name assignment by function/index: >>> df = pl.DataFrame({"n": [[0, 1], [2, 3]]}) - >>> df.select(pl.col("n").list.to_struct(fields=lambda idx: f"n{idx}")).rows( - ... named=True - ... ) + >>> df.select(pl.col("n").list.to_struct(fields=lambda idx: f"n{idx}")).rows(named=True) [{'n': {'n0': 0, 'n1': 1}}, {'n': {'n0': 2, 'n1': 3}}] Convert list to struct with field name assignment by index from a list of names: - >>> df.select(pl.col("n").list.to_struct(fields=["one", "two"])).rows( - ... named=True - ... ) + >>> df.select(pl.col("n").list.to_struct(fields=["one", "two"])).rows(named=True) [{'n': {'one': 0, 'two': 1}}, {'n': {'one': 2, 'two': 3}}] """ @@ -1082,9 +1074,7 @@ def eval(self, expr: Expr, *, parallel: bool = False) -> Expr: Examples -------- >>> df = pl.DataFrame({"a": [1, 8, 3], "b": [4, 5, 2]}) - >>> df.with_columns( - ... rank=pl.concat_list("a", "b").list.eval(pl.element().rank()) - ... ) + >>> df.with_columns(rank=pl.concat_list("a", "b").list.eval(pl.element().rank())) shape: (3, 3) ┌─────┬─────┬────────────┐ │ a ┆ b ┆ rank │ @@ -1116,9 +1106,7 @@ def set_union(self, other: IntoExpr) -> Expr: ... "b": [[2, 3, 4], [3], [3, 4, None], [6, 8]], ... } ... ) - >>> df.with_columns( - ... union=pl.col("a").list.set_union("b") - ... ) # doctest: +IGNORE_RESULT + >>> df.with_columns(union=pl.col("a").list.set_union("b")) # doctest: +IGNORE_RESULT shape: (4, 3) ┌───────────┬──────────────┬───────────────┐ │ a ┆ b ┆ union │ diff --git a/py-polars/polars/expr/name.py b/py-polars/polars/expr/name.py index 92439c566f65..acce5f0b4f47 100644 --- a/py-polars/polars/expr/name.py +++ b/py-polars/polars/expr/name.py @@ -91,9 +91,7 @@ def map(self, function: Callable[[str], str]) -> Expr: ... "B_reverse": ["z", "y", "x"], ... } ... ) - >>> df.with_columns( - ... pl.all().reverse().name.map(lambda c: c.rstrip("_reverse").lower()) - ... ) + >>> df.with_columns(pl.all().reverse().name.map(lambda c: c.rstrip("_reverse").lower())) shape: (3, 4) ┌───────────┬───────────┬─────┬─────┐ │ A_reverse ┆ B_reverse ┆ a ┆ b │ diff --git a/py-polars/polars/expr/string.py b/py-polars/polars/expr/string.py index 3b8e2e0adfd8..eaf081e7ce81 100644 --- a/py-polars/polars/expr/string.py +++ b/py-polars/polars/expr/string.py @@ -545,9 +545,7 @@ def to_titlecase(self) -> Expr: Examples -------- - >>> df = pl.DataFrame( - ... {"sing": ["welcome to my world", "THERE'S NO TURNING BACK"]} - ... ) + >>> df = pl.DataFrame({"sing": ["welcome to my world", "THERE'S NO TURNING BACK"]}) >>> df.with_columns(foo_title=pl.col("sing").str.to_titlecase()) shape: (2, 2) ┌─────────────────────────┬─────────────────────────┐ @@ -1189,9 +1187,7 @@ def json_decode( Examples -------- - >>> df = pl.DataFrame( - ... {"json": ['{"a":1, "b": true}', None, '{"a":2, "b": false}']} - ... ) + >>> df = pl.DataFrame({"json": ['{"a":1, "b": true}', None, '{"a":2, "b": false}']}) >>> dtype = pl.Struct([pl.Field("a", pl.Int64), pl.Field("b", pl.Boolean)]) >>> df.with_columns(decoded=pl.col("json").str.json_decode(dtype)) shape: (3, 2) @@ -1505,9 +1501,9 @@ def extract_groups(self, pattern: str) -> Expr: >>> df = pl.DataFrame({"col": ["foo bar baz"]}) >>> ( - ... df.with_columns( - ... pl.col("col").str.extract_groups(r"(\S+) (\S+) (.+)") - ... ).select(pl.col("col").struct["2"], pl.col("col").struct["3"]) + ... df.with_columns(pl.col("col").str.extract_groups(r"(\S+) (\S+) (.+)")).select( + ... pl.col("col").struct["2"], pl.col("col").struct["3"] + ... ) ... ) shape: (1, 2) ┌─────┬─────┐ @@ -1536,9 +1532,7 @@ def extract_groups(self, pattern: str) -> Expr: ... } ... ) >>> pattern = r"candidate=(?\w+)&ref=(?\w+)" - >>> df.select(captures=pl.col("url").str.extract_groups(pattern)).unnest( - ... "captures" - ... ) + >>> df.select(captures=pl.col("url").str.extract_groups(pattern)).unnest("captures") shape: (3, 2) ┌───────────┬────────┐ │ candidate ┆ ref │ @@ -1554,9 +1548,9 @@ def extract_groups(self, pattern: str) -> Expr: >>> pattern = r"candidate=(\w+)&ref=(\w+)" >>> ( - ... df.with_columns( - ... captures=pl.col("url").str.extract_groups(pattern) - ... ).with_columns(name=pl.col("captures").struct["1"].str.to_uppercase()) + ... df.with_columns(captures=pl.col("url").str.extract_groups(pattern)).with_columns( + ... name=pl.col("captures").struct["1"].str.to_uppercase() + ... ) ... ) shape: (3, 3) ┌───────────────────────────────────┬───────────────────────┬──────────┐ @@ -1610,9 +1604,7 @@ def count_matches(self, pattern: str | Expr, *, literal: bool = False) -> Expr: >>> df = pl.DataFrame({"bar": ["12 dbc 3xy", "cat\\w", "1zy3\\d\\d", None]}) >>> df.with_columns( - ... pl.col("bar") - ... .str.count_matches(r"\d", literal=True) - ... .alias("count_digits"), + ... pl.col("bar").str.count_matches(r"\d", literal=True).alias("count_digits"), ... ) shape: (4, 2) ┌────────────┬──────────────┐ @@ -1659,14 +1651,10 @@ def split(self, by: IntoExpr, *, inclusive: bool = False) -> Expr: │ foo_bar_baz ┆ ["foo", "bar", "baz"] ┆ ["foo_", "bar_", "baz"] │ └─────────────┴───────────────────────┴─────────────────────────┘ - >>> df = pl.DataFrame( - ... {"s": ["foo^bar", "foo_bar", "foo*bar*baz"], "by": ["_", "_", "*"]} - ... ) + >>> df = pl.DataFrame({"s": ["foo^bar", "foo_bar", "foo*bar*baz"], "by": ["_", "_", "*"]}) >>> df.with_columns( ... pl.col("s").str.split(by=pl.col("by")).alias("split"), - ... pl.col("s") - ... .str.split(by=pl.col("by"), inclusive=True) - ... .alias("split_inclusive"), + ... pl.col("s").str.split(by=pl.col("by"), inclusive=True).alias("split_inclusive"), ... ) shape: (3, 4) ┌─────────────┬─────┬───────────────────────┬─────────────────────────┐ @@ -1887,9 +1875,7 @@ def replace( Examples -------- >>> df = pl.DataFrame({"id": [1, 2], "text": ["123abc", "abc456"]}) - >>> df.with_columns( - ... pl.col("text").str.replace(r"abc\b", "ABC") - ... ) # doctest: +IGNORE_RESULT + >>> df.with_columns(pl.col("text").str.replace(r"abc\b", "ABC")) # doctest: +IGNORE_RESULT shape: (2, 2) ┌─────┬────────┐ │ id ┆ text │ diff --git a/py-polars/polars/expr/struct.py b/py-polars/polars/expr/struct.py index ac50c551e538..70413410f6ac 100644 --- a/py-polars/polars/expr/struct.py +++ b/py-polars/polars/expr/struct.py @@ -129,9 +129,7 @@ def rename_fields(self, names: Sequence[str]) -> Expr: Rename fields: - >>> df = df.select( - ... pl.col("struct_col").struct.rename_fields(["www", "xxx", "yyy", "zzz"]) - ... ) + >>> df = df.select(pl.col("struct_col").struct.rename_fields(["www", "xxx", "yyy", "zzz"])) >>> df.unnest("struct_col") shape: (2, 4) ┌─────┬─────┬──────┬───────────┐ diff --git a/py-polars/polars/functions/as_datatype.py b/py-polars/polars/functions/as_datatype.py index 0b328b0ad1ba..17069f7fddb7 100644 --- a/py-polars/polars/functions/as_datatype.py +++ b/py-polars/polars/functions/as_datatype.py @@ -252,7 +252,6 @@ def duration( ... (pl.col("dt") + pl.duration(milliseconds="add")).alias("add_millis"), ... (pl.col("dt") + pl.duration(hours="add")).alias("add_hours"), ... ) - ... shape: (2, 5) ┌─────────────────────┬─────────────────────┬─────────────────────┬─────────────────────────┬─────────────────────┐ │ add_weeks ┆ add_days ┆ add_seconds ┆ add_millis ┆ add_hours │ @@ -267,17 +266,10 @@ def duration( >>> with pl.Config(tbl_width_chars=120): ... df.select( - ... add_calendar_days=pl.col("dt").dt.offset_by( - ... pl.format("{}d", pl.col("add")) - ... ), - ... add_calendar_months=pl.col("dt").dt.offset_by( - ... pl.format("{}mo", pl.col("add")) - ... ), - ... add_calendar_years=pl.col("dt").dt.offset_by( - ... pl.format("{}y", pl.col("add")) - ... ), + ... add_calendar_days=pl.col("dt").dt.offset_by(pl.format("{}d", pl.col("add"))), + ... add_calendar_months=pl.col("dt").dt.offset_by(pl.format("{}mo", pl.col("add"))), + ... add_calendar_years=pl.col("dt").dt.offset_by(pl.format("{}y", pl.col("add"))), ... ) - ... shape: (2, 3) ┌─────────────────────┬─────────────────────┬─────────────────────┐ │ add_calendar_days ┆ add_calendar_months ┆ add_calendar_years │ @@ -343,9 +335,7 @@ def concat_list(exprs: IntoExpr | Iterable[IntoExpr], *more_exprs: IntoExpr) -> >>> df = pl.DataFrame({"A": [1.0, 2.0, 9.0, 2.0, 13.0]}) >>> df = df.select([pl.col("A").shift(i).alias(f"A_lag_{i}") for i in range(3)]) - >>> df.select( - ... pl.concat_list([f"A_lag_{i}" for i in range(3)][::-1]).alias("A_rolling") - ... ) + >>> df.select(pl.concat_list([f"A_lag_{i}" for i in range(3)][::-1]).alias("A_rolling")) shape: (5, 1) ┌───────────────────┐ │ A_rolling │ diff --git a/py-polars/polars/functions/eager.py b/py-polars/polars/functions/eager.py index f3a388028947..8a712253d7c9 100644 --- a/py-polars/polars/functions/eager.py +++ b/py-polars/polars/functions/eager.py @@ -324,9 +324,7 @@ def align_frames( Align frames by the "dt" column: - >>> af1, af2, af3 = pl.align_frames( - ... df1, df2, df3, on="dt" - ... ) # doctest: +IGNORE_RESULT + >>> af1, af2, af3 = pl.align_frames(df1, df2, df3, on="dt") # doctest: +IGNORE_RESULT # # df1 df2 df3 # shape: (3, 3) shape: (3, 3) shape: (3, 3) diff --git a/py-polars/polars/functions/lazy.py b/py-polars/polars/functions/lazy.py index 93c762f5d1fb..ce5a82dd34f8 100644 --- a/py-polars/polars/functions/lazy.py +++ b/py-polars/polars/functions/lazy.py @@ -44,9 +44,7 @@ def element() -> Expr: A horizontal rank computation by taking the elements of a list >>> df = pl.DataFrame({"a": [1, 8, 3], "b": [4, 5, 2]}) - >>> df.with_columns( - ... pl.concat_list(["a", "b"]).list.eval(pl.element().rank()).alias("rank") - ... ) + >>> df.with_columns(pl.concat_list(["a", "b"]).list.eval(pl.element().rank()).alias("rank")) shape: (3, 3) ┌─────┬─────┬────────────┐ │ a ┆ b ┆ rank │ @@ -806,7 +804,6 @@ def map_batches( -------- >>> def test_func(a, b, c): ... return a + b + c - ... >>> df = pl.DataFrame( ... { ... "a": [1, 2, 3, 4], @@ -926,8 +923,7 @@ def map_groups( ... df.group_by("group").agg( ... pl.map_groups( ... exprs=["a", "b"], - ... function=lambda list_of_series: list_of_series[0] - ... / list_of_series[0].sum() + ... function=lambda list_of_series: list_of_series[0] / list_of_series[0].sum() ... + list_of_series[1], ... ).alias("my_custom_aggregation") ... ) @@ -1042,9 +1038,9 @@ def fold( Horizontally sum over all columns and add 1. >>> df.select( - ... pl.fold( - ... acc=pl.lit(1), function=lambda acc, x: acc + x, exprs=pl.col("*") - ... ).alias("sum"), + ... pl.fold(acc=pl.lit(1), function=lambda acc, x: acc + x, exprs=pl.col("*")).alias( + ... "sum" + ... ), ... ) shape: (3, 1) ┌─────┐ @@ -1143,9 +1139,7 @@ def reduce( Horizontally sum over all columns. - >>> df.select( - ... pl.reduce(function=lambda acc, x: acc + x, exprs=pl.col("*")).alias("sum") - ... ) + >>> df.select(pl.reduce(function=lambda acc, x: acc + x, exprs=pl.col("*")).alias("sum")) shape: (3, 1) ┌─────┐ │ sum │ @@ -1299,9 +1293,7 @@ def arctan2(y: str | Expr, x: str | Expr) -> Expr: ... "x": [twoRootTwo, twoRootTwo, -twoRootTwo, -twoRootTwo], ... } ... ) - >>> df.select( - ... pl.arctan2d("y", "x").alias("atan2d"), pl.arctan2("y", "x").alias("atan2") - ... ) + >>> df.select(pl.arctan2d("y", "x").alias("atan2d"), pl.arctan2("y", "x").alias("atan2")) shape: (4, 2) ┌────────┬───────────┐ │ atan2d ┆ atan2 │ @@ -1346,9 +1338,7 @@ def arctan2d(y: str | Expr, x: str | Expr) -> Expr: ... "x": [twoRootTwo, twoRootTwo, -twoRootTwo, -twoRootTwo], ... } ... ) - >>> df.select( - ... pl.arctan2d("y", "x").alias("atan2d"), pl.arctan2("y", "x").alias("atan2") - ... ) + >>> df.select(pl.arctan2d("y", "x").alias("atan2d"), pl.arctan2("y", "x").alias("atan2")) shape: (4, 2) ┌────────┬───────────┐ │ atan2d ┆ atan2 │ diff --git a/py-polars/polars/io/csv/functions.py b/py-polars/polars/io/csv/functions.py index cf0e93522512..ebd6fd9bb88c 100644 --- a/py-polars/polars/io/csv/functions.py +++ b/py-polars/polars/io/csv/functions.py @@ -550,7 +550,6 @@ def read_csv_batched( >>> batches = reader.next_batches(5) # doctest: +SKIP >>> for df in batches: # doctest: +SKIP ... print(df) - ... Read big CSV file in batches and write a CSV file for each "group" of interest. @@ -571,7 +570,6 @@ def read_csv_batched( ... seen_groups.add(group) ... ... batches = reader.next_batches(100) - ... """ projection, columns = handle_projection_columns(columns) @@ -838,9 +836,7 @@ def scan_csv( >>> >>> ( ... pl.scan_csv("my_long_file.csv") # lazy, doesn't do a thing - ... .select( - ... ["a", "c"] - ... ) # select only 2 columns (other columns will not be read) + ... .select(["a", "c"]) # select only 2 columns (other columns will not be read) ... .filter( ... pl.col("a") > 10 ... ) # the filter is pushed down the scan, so less data is read into memory @@ -849,9 +845,7 @@ def scan_csv( We can use `with_column_names` to modify the header before scanning: - >>> df = pl.DataFrame( - ... {"BrEeZaH": [1, 2, 3, 4], "LaNgUaGe": ["is", "hard", "to", "read"]} - ... ) + >>> df = pl.DataFrame({"BrEeZaH": [1, 2, 3, 4], "LaNgUaGe": ["is", "hard", "to", "read"]}) >>> path: pathlib.Path = dirpath / "mydf.csv" >>> df.write_csv(path) >>> pl.scan_csv( diff --git a/py-polars/polars/io/delta.py b/py-polars/polars/io/delta.py index 8521bd05fef4..7cc3be357e17 100644 --- a/py-polars/polars/io/delta.py +++ b/py-polars/polars/io/delta.py @@ -122,9 +122,7 @@ def read_delta( >>> table_path = "/path/to/delta-table/" >>> delta_table_options = {"without_files": True} - >>> pl.read_delta( - ... table_path, delta_table_options=delta_table_options - ... ) # doctest: +SKIP + >>> pl.read_delta(table_path, delta_table_options=delta_table_options) # doctest: +SKIP """ if pyarrow_options is None: @@ -212,9 +210,7 @@ def scan_delta( ... "AWS_ACCESS_KEY_ID": "THE_AWS_ACCESS_KEY_ID", ... "AWS_SECRET_ACCESS_KEY": "THE_AWS_SECRET_ACCESS_KEY", ... } - >>> pl.scan_delta( - ... table_path, storage_options=storage_options - ... ).collect() # doctest: +SKIP + >>> pl.scan_delta(table_path, storage_options=storage_options).collect() # doctest: +SKIP Creates a scan for a Delta table from Google Cloud storage (GCS). See a list of supported storage options for GCS `here @@ -222,9 +218,7 @@ def scan_delta( >>> table_path = "gs://bucket/path/to/delta-table/" >>> storage_options = {"SERVICE_ACCOUNT": "SERVICE_ACCOUNT_JSON_ABSOLUTE_PATH"} - >>> pl.scan_delta( - ... table_path, storage_options=storage_options - ... ).collect() # doctest: +SKIP + >>> pl.scan_delta(table_path, storage_options=storage_options).collect() # doctest: +SKIP Creates a scan for a Delta table from Azure. Supported options for Azure are available `here @@ -241,9 +235,7 @@ def scan_delta( ... "AZURE_STORAGE_ACCOUNT_NAME": "AZURE_STORAGE_ACCOUNT_NAME", ... "AZURE_STORAGE_ACCOUNT_KEY": "AZURE_STORAGE_ACCOUNT_KEY", ... } - >>> pl.scan_delta( - ... table_path, storage_options=storage_options - ... ).collect() # doctest: +SKIP + >>> pl.scan_delta(table_path, storage_options=storage_options).collect() # doctest: +SKIP Creates a scan for a Delta table with additional delta specific options. In the below example, `without_files` option is used which loads the table without diff --git a/py-polars/polars/io/iceberg.py b/py-polars/polars/io/iceberg.py index 6439fad1bef8..70e8b9032385 100644 --- a/py-polars/polars/io/iceberg.py +++ b/py-polars/polars/io/iceberg.py @@ -81,9 +81,7 @@ def scan_iceberg( ... "s3.access-key-id": "THE_AWS_ACCESS_KEY_ID", ... "s3.secret-access-key": "THE_AWS_SECRET_ACCESS_KEY", ... } - >>> pl.scan_iceberg( - ... table_path, storage_options=storage_options - ... ).collect() # doctest: +SKIP + >>> pl.scan_iceberg(table_path, storage_options=storage_options).collect() # doctest: +SKIP Creates a scan for an Iceberg table from Azure. Supported options for Azure are available `here @@ -100,9 +98,7 @@ def scan_iceberg( ... "adlfs.account-name": "AZURE_STORAGE_ACCOUNT_NAME", ... "adlfs.account-key": "AZURE_STORAGE_ACCOUNT_KEY", ... } - >>> pl.scan_iceberg( - ... table_path, storage_options=storage_options - ... ).collect() # doctest: +SKIP + >>> pl.scan_iceberg(table_path, storage_options=storage_options).collect() # doctest: +SKIP Creates a scan for an Iceberg table from Google Cloud Storage. Supported options for GCS are available `here @@ -113,9 +109,7 @@ def scan_iceberg( ... "gcs.project-id": "my-gcp-project", ... "gcs.oauth.token": "ya29.dr.AfM...", ... } - >>> pl.scan_iceberg( - ... table_path, storage_options=storage_options - ... ).collect() # doctest: +SKIP + >>> pl.scan_iceberg(table_path, storage_options=storage_options).collect() # doctest: +SKIP Creates a scan for an Iceberg table with additional options. In the below example, `without_files` option is used which loads the table without @@ -123,9 +117,7 @@ def scan_iceberg( >>> table_path = "/path/to/iceberg-table/metadata.json" >>> storage_options = {"py-io-impl": "pyiceberg.io.fsspec.FsspecFileIO"} - >>> pl.scan_iceberg( - ... table_path, storage_options=storage_options - ... ).collect() # doctest: +SKIP + >>> pl.scan_iceberg(table_path, storage_options=storage_options).collect() # doctest: +SKIP """ from pyiceberg.io.pyarrow import schema_to_pyarrow diff --git a/py-polars/polars/lazyframe/frame.py b/py-polars/polars/lazyframe/frame.py index 2ed0ea8f1753..16e7d0ca62a4 100644 --- a/py-polars/polars/lazyframe/frame.py +++ b/py-polars/polars/lazyframe/frame.py @@ -961,7 +961,6 @@ def pipe( -------- >>> def cast_str_to_int(data, col_name): ... return data.with_columns(pl.col(col_name).cast(pl.Int64)) - ... >>> lf = pl.LazyFrame( ... { ... "a": [1, 2, 3, 4], @@ -1749,9 +1748,7 @@ def collect( Collect in streaming mode - >>> lf.group_by("a").agg(pl.all().sum()).collect( - ... streaming=True - ... ) # doctest: +SKIP + >>> lf.group_by("a").agg(pl.all().sum()).collect(streaming=True) # doctest: +SKIP shape: (3, 3) ┌─────┬─────┬─────┐ │ a ┆ b ┆ c │ @@ -1902,11 +1899,8 @@ def collect_async( ... ) >>> async def main(): ... return await ( - ... lf.group_by("a", maintain_order=True) - ... .agg(pl.all().sum()) - ... .collect_async() + ... lf.group_by("a", maintain_order=True).agg(pl.all().sum()).collect_async() ... ) - ... >>> asyncio.run(main()) shape: (3, 3) ┌─────┬─────┬─────┐ @@ -2863,9 +2857,7 @@ def select( Use keyword arguments to easily name your expression inputs. - >>> lf.select( - ... threshold=pl.when(pl.col("foo") > 2).then(10).otherwise(0) - ... ).collect() + >>> lf.select(threshold=pl.when(pl.col("foo") > 2).then(10).otherwise(0)).collect() shape: (3, 1) ┌───────────┐ │ threshold │ @@ -2884,7 +2876,6 @@ def select( ... lf.select( ... is_odd=(pl.col(pl.INTEGER_DTYPES) % 2).name.suffix("_is_odd"), ... ).collect() - ... shape: (3, 1) ┌───────────┐ │ is_odd │ @@ -3014,9 +3005,7 @@ def group_by( Or use positional arguments to group by multiple columns in the same way. Expressions are also accepted. - >>> lf.group_by("a", pl.col("b") // 2).agg( - ... pl.col("c").mean() - ... ).collect() # doctest: +SKIP + >>> lf.group_by("a", pl.col("b") // 2).agg(pl.col("c").mean()).collect() # doctest: +SKIP shape: (3, 3) ┌─────┬─────┬─────┐ │ a ┆ b ┆ c │ @@ -3367,9 +3356,7 @@ def group_by_dynamic( Group by windows of 1 hour starting at 2021-12-16 00:00:00. - >>> lf.group_by_dynamic("time", every="1h", closed="right").agg( - ... pl.col("n") - ... ).collect() + >>> lf.group_by_dynamic("time", every="1h", closed="right").agg(pl.col("n")).collect() shape: (4, 2) ┌─────────────────────┬───────────┐ │ time ┆ n │ @@ -3384,9 +3371,9 @@ def group_by_dynamic( The window boundaries can also be added to the aggregation result - >>> lf.group_by_dynamic( - ... "time", every="1h", include_boundaries=True, closed="right" - ... ).agg(pl.col("n").mean()).collect() + >>> lf.group_by_dynamic("time", every="1h", include_boundaries=True, closed="right").agg( + ... pl.col("n").mean() + ... ).collect() shape: (4, 4) ┌─────────────────────┬─────────────────────┬─────────────────────┬─────┐ │ _lower_boundary ┆ _upper_boundary ┆ time ┆ n │ @@ -3402,9 +3389,7 @@ def group_by_dynamic( When closed="left", the window excludes the right end of interval: [lower_bound, upper_bound) - >>> lf.group_by_dynamic("time", every="1h", closed="left").agg( - ... pl.col("n") - ... ).collect() + >>> lf.group_by_dynamic("time", every="1h", closed="left").agg(pl.col("n")).collect() shape: (4, 2) ┌─────────────────────┬───────────┐ │ time ┆ n │ @@ -3419,9 +3404,7 @@ def group_by_dynamic( When closed="both" the time values at the window boundaries belong to 2 groups. - >>> lf.group_by_dynamic("time", every="1h", closed="both").agg( - ... pl.col("n") - ... ).collect() + >>> lf.group_by_dynamic("time", every="1h", closed="both").agg(pl.col("n")).collect() shape: (5, 2) ┌─────────────────────┬───────────┐ │ time ┆ n │ @@ -4035,7 +4018,6 @@ def with_columns( ... lf.drop("c").with_columns( ... diffs=pl.col(["a", "b"]).diff().name.suffix("_diff"), ... ).collect() - ... shape: (4, 3) ┌─────┬──────┬─────────────┐ │ a ┆ b ┆ diffs │ @@ -4112,9 +4094,7 @@ def with_context(self, other: Self | list[Self]) -> Self: -------- >>> lf = pl.LazyFrame({"a": [1, 2, 3], "b": ["a", "c", None]}) >>> lf_other = pl.LazyFrame({"c": ["foo", "ham"]}) - >>> lf.with_context(lf_other).select( - ... pl.col("b") + pl.col("c").first() - ... ).collect() + >>> lf.with_context(lf_other).select(pl.col("b") + pl.col("c").first()).collect() shape: (3, 1) ┌──────┐ │ b │ @@ -4128,15 +4108,9 @@ def with_context(self, other: Self | list[Self]) -> Self: Fill nulls with the median from another DataFrame: - >>> train_lf = pl.LazyFrame( - ... {"feature_0": [-1.0, 0, 1], "feature_1": [-1.0, 0, 1]} - ... ) - >>> test_lf = pl.LazyFrame( - ... {"feature_0": [-1.0, None, 1], "feature_1": [-1.0, 0, 1]} - ... ) - >>> test_lf.with_context( - ... train_lf.select(pl.all().name.suffix("_train")) - ... ).select( + >>> train_lf = pl.LazyFrame({"feature_0": [-1.0, 0, 1], "feature_1": [-1.0, 0, 1]}) + >>> test_lf = pl.LazyFrame({"feature_0": [-1.0, None, 1], "feature_1": [-1.0, 0, 1]}) + >>> test_lf.with_context(train_lf.select(pl.all().name.suffix("_train"))).select( ... pl.col("feature_0").fill_null(pl.col("feature_0_train").median()) ... ).collect() shape: (3, 1) @@ -5655,9 +5629,7 @@ def merge_sorted(self, other: LazyFrame, key: str) -> Self: Examples -------- - >>> df0 = pl.LazyFrame( - ... {"name": ["steve", "elise", "bob"], "age": [42, 44, 18]} - ... ).sort("age") + >>> df0 = pl.LazyFrame({"name": ["steve", "elise", "bob"], "age": [42, 44, 18]}).sort("age") >>> df0.collect() shape: (3, 2) ┌───────┬─────┐ @@ -5841,9 +5813,7 @@ def update( Update `df` values including null values in `new_df`, using an outer join strategy that defines explicit join columns in each frame: - >>> lf.update( - ... new_lf, left_on="A", right_on="C", how="outer", include_nulls=True - ... ).collect() + >>> lf.update(new_lf, left_on="A", right_on="C", how="outer", include_nulls=True).collect() shape: (5, 2) ┌─────┬──────┐ │ A ┆ B │ diff --git a/py-polars/polars/lazyframe/group_by.py b/py-polars/polars/lazyframe/group_by.py index 8042b428dc8b..faea120ab682 100644 --- a/py-polars/polars/lazyframe/group_by.py +++ b/py-polars/polars/lazyframe/group_by.py @@ -52,9 +52,7 @@ def agg( ... "c": [5, 4, 3, 2, 1], ... } ... ).lazy() - >>> ldf.group_by("a").agg( - ... [pl.col("b"), pl.col("c")] - ... ).collect() # doctest: +IGNORE_RESULT + >>> ldf.group_by("a").agg([pl.col("b"), pl.col("c")]).collect() # doctest: +IGNORE_RESULT shape: (3, 3) ┌─────┬───────────┬───────────┐ │ a ┆ b ┆ c │ @@ -70,9 +68,7 @@ def agg( Compute the sum of a column for each group. - >>> ldf.group_by("a").agg( - ... pl.col("b").sum() - ... ).collect() # doctest: +IGNORE_RESULT + >>> ldf.group_by("a").agg(pl.col("b").sum()).collect() # doctest: +IGNORE_RESULT shape: (3, 2) ┌─────┬─────┐ │ a ┆ b │ @@ -86,9 +82,7 @@ def agg( Compute multiple aggregates at once by passing a list of expressions. - >>> ldf.group_by("a").agg( - ... [pl.sum("b"), pl.mean("c")] - ... ).collect() # doctest: +IGNORE_RESULT + >>> ldf.group_by("a").agg([pl.sum("b"), pl.mean("c")]).collect() # doctest: +IGNORE_RESULT shape: (3, 3) ┌─────┬─────┬─────┐ │ a ┆ b ┆ c │ @@ -209,9 +203,7 @@ def map_groups( It is better to implement this with an expression: >>> ( - ... df.lazy() - ... .filter(pl.int_range(0, pl.count()).shuffle().over("color") < 2) - ... .collect() + ... df.lazy().filter(pl.int_range(0, pl.count()).shuffle().over("color") < 2).collect() ... ) # doctest: +IGNORE_RESULT """ diff --git a/py-polars/polars/selectors.py b/py-polars/polars/selectors.py index 2e6996f0bf39..7b3d686561e7 100644 --- a/py-polars/polars/selectors.py +++ b/py-polars/polars/selectors.py @@ -1650,7 +1650,6 @@ def object() -> SelectorType: ... schema_overrides={"idx": pl.Int32}, ... ) ... print(df) # doctest: +IGNORE_RESULT - ... shape: (2, 3) ┌─────┬──────────────────────────────────────┬──────────────────────────────────────┐ │ idx ┆ uuid_obj ┆ uuid_str │ diff --git a/py-polars/polars/series/array.py b/py-polars/polars/series/array.py index 855827231172..3ac967a6970f 100644 --- a/py-polars/polars/series/array.py +++ b/py-polars/polars/series/array.py @@ -24,9 +24,7 @@ def min(self) -> Series: Examples -------- - >>> s = pl.Series( - ... "a", [[1, 2], [4, 3]], dtype=pl.Array(inner=pl.Int64, width=2) - ... ) + >>> s = pl.Series("a", [[1, 2], [4, 3]], dtype=pl.Array(inner=pl.Int64, width=2)) >>> s.arr.min() shape: (2,) Series: 'a' [i64] @@ -43,9 +41,7 @@ def max(self) -> Series: Examples -------- - >>> s = pl.Series( - ... "a", [[1, 2], [4, 3]], dtype=pl.Array(inner=pl.Int64, width=2) - ... ) + >>> s = pl.Series("a", [[1, 2], [4, 3]], dtype=pl.Array(inner=pl.Int64, width=2)) >>> s.arr.max() shape: (2,) Series: 'a' [i64] diff --git a/py-polars/polars/series/categorical.py b/py-polars/polars/series/categorical.py index 62f68aac4d6e..daa8b32f6dd5 100644 --- a/py-polars/polars/series/categorical.py +++ b/py-polars/polars/series/categorical.py @@ -92,7 +92,6 @@ def is_local(self) -> bool: >>> with pl.StringCache(): ... s = pl.Series(["a", "b", "a"], dtype=pl.Categorical) - ... >>> s.cat.is_local() False @@ -115,7 +114,6 @@ def to_local(self) -> Series: >>> with pl.StringCache(): ... _ = pl.Series("x", ["a", "b", "a"], dtype=pl.Categorical) ... s = pl.Series("y", ["c", "b", "d"], dtype=pl.Categorical) - ... >>> s.to_physical() shape: (3,) Series: 'y' [u32] diff --git a/py-polars/polars/series/datetime.py b/py-polars/polars/series/datetime.py index 17feaa9fa838..828c29161c6d 100644 --- a/py-polars/polars/series/datetime.py +++ b/py-polars/polars/series/datetime.py @@ -64,9 +64,7 @@ def median(self) -> dt.date | dt.datetime | dt.timedelta | None: Examples -------- >>> from datetime import datetime - >>> date = pl.datetime_range( - ... datetime(2001, 1, 1), datetime(2001, 1, 3), "1d", eager=True - ... ) + >>> date = pl.datetime_range(datetime(2001, 1, 1), datetime(2001, 1, 3), "1d", eager=True) >>> date shape: (3,) Series: 'datetime' [datetime[μs]] @@ -95,9 +93,7 @@ def mean(self) -> dt.date | dt.datetime | None: Examples -------- >>> from datetime import datetime - >>> s = pl.Series( - ... [datetime(2001, 1, 1), datetime(2001, 1, 2), datetime(2001, 1, 3)] - ... ) + >>> s = pl.Series([datetime(2001, 1, 1), datetime(2001, 1, 2), datetime(2001, 1, 3)]) >>> s.dt.mean() datetime.datetime(2001, 1, 2, 0, 0) @@ -223,9 +219,7 @@ def is_leap_year(self) -> Series: Examples -------- >>> from datetime import date - >>> s = pl.Series( - ... "date", [date(2000, 1, 1), date(2001, 1, 1), date(2002, 1, 1)] - ... ) + >>> s = pl.Series("date", [date(2000, 1, 1), date(2001, 1, 1), date(2002, 1, 1)]) >>> s.dt.is_leap_year() shape: (3,) Series: 'date' [bool] @@ -280,9 +274,7 @@ def quarter(self) -> Series: Examples -------- >>> from datetime import date - >>> date = pl.date_range( - ... date(2001, 1, 1), date(2001, 4, 1), interval="1mo", eager=True - ... ) + >>> date = pl.date_range(date(2001, 1, 1), date(2001, 4, 1), interval="1mo", eager=True) >>> date.dt.quarter() shape: (4,) Series: 'date' [u32] @@ -312,9 +304,7 @@ def month(self) -> Series: Examples -------- >>> from datetime import date - >>> date = pl.date_range( - ... date(2001, 1, 1), date(2001, 4, 1), interval="1mo", eager=True - ... ) + >>> date = pl.date_range(date(2001, 1, 1), date(2001, 4, 1), interval="1mo", eager=True) >>> date.dt.month() shape: (4,) Series: 'date' [u32] @@ -344,9 +334,7 @@ def week(self) -> Series: Examples -------- >>> from datetime import date - >>> date = pl.date_range( - ... date(2001, 1, 1), date(2001, 4, 1), interval="1mo", eager=True - ... ) + >>> date = pl.date_range(date(2001, 1, 1), date(2001, 4, 1), interval="1mo", eager=True) >>> date.dt.week() shape: (4,) Series: 'date' [u32] @@ -408,9 +396,7 @@ def day(self) -> Series: Examples -------- >>> from datetime import date - >>> s = pl.date_range( - ... date(2001, 1, 1), date(2001, 1, 9), interval="2d", eager=True - ... ) + >>> s = pl.date_range(date(2001, 1, 1), date(2001, 1, 9), interval="2d", eager=True) >>> s.dt.day() shape: (5,) Series: 'date' [u32] @@ -441,9 +427,7 @@ def ordinal_day(self) -> Series: Examples -------- >>> from datetime import date - >>> s = pl.date_range( - ... date(2001, 1, 1), date(2001, 3, 1), interval="1mo", eager=True - ... ) + >>> s = pl.date_range(date(2001, 1, 1), date(2001, 3, 1), interval="1mo", eager=True) >>> s.dt.ordinal_day() shape: (3,) Series: 'date' [u32] @@ -469,9 +453,7 @@ def time(self) -> Series: Examples -------- >>> from datetime import datetime - >>> ser = pl.Series([datetime(2021, 1, 2, 5)]).dt.replace_time_zone( - ... "Asia/Kathmandu" - ... ) + >>> ser = pl.Series([datetime(2021, 1, 2, 5)]).dt.replace_time_zone("Asia/Kathmandu") >>> ser shape: (1,) Series: '' [datetime[μs, Asia/Kathmandu]] @@ -500,9 +482,7 @@ def date(self) -> Series: Examples -------- >>> from datetime import datetime - >>> ser = pl.Series([datetime(2021, 1, 2, 5)]).dt.replace_time_zone( - ... "Asia/Kathmandu" - ... ) + >>> ser = pl.Series([datetime(2021, 1, 2, 5)]).dt.replace_time_zone("Asia/Kathmandu") >>> ser shape: (1,) Series: '' [datetime[μs, Asia/Kathmandu]] @@ -531,9 +511,7 @@ def datetime(self) -> Series: Examples -------- >>> from datetime import datetime - >>> ser = pl.Series([datetime(2021, 1, 2, 5)]).dt.replace_time_zone( - ... "Asia/Kathmandu" - ... ) + >>> ser = pl.Series([datetime(2021, 1, 2, 5)]).dt.replace_time_zone("Asia/Kathmandu") >>> ser shape: (1,) Series: '' [datetime[μs, Asia/Kathmandu]] @@ -1110,9 +1088,7 @@ def total_days(self) -> Series: Examples -------- >>> from datetime import datetime - >>> date = pl.datetime_range( - ... datetime(2020, 3, 1), datetime(2020, 5, 1), "1mo", eager=True - ... ) + >>> date = pl.datetime_range(datetime(2020, 3, 1), datetime(2020, 5, 1), "1mo", eager=True) >>> date shape: (3,) Series: 'datetime' [datetime[μs]] @@ -1144,9 +1120,7 @@ def total_hours(self) -> Series: Examples -------- >>> from datetime import datetime - >>> date = pl.datetime_range( - ... datetime(2020, 1, 1), datetime(2020, 1, 4), "1d", eager=True - ... ) + >>> date = pl.datetime_range(datetime(2020, 1, 1), datetime(2020, 1, 4), "1d", eager=True) >>> date shape: (4,) Series: 'datetime' [datetime[μs]] @@ -1180,9 +1154,7 @@ def total_minutes(self) -> Series: Examples -------- >>> from datetime import datetime - >>> date = pl.datetime_range( - ... datetime(2020, 1, 1), datetime(2020, 1, 4), "1d", eager=True - ... ) + >>> date = pl.datetime_range(datetime(2020, 1, 1), datetime(2020, 1, 4), "1d", eager=True) >>> date shape: (4,) Series: 'datetime' [datetime[μs]] @@ -1392,9 +1364,7 @@ def offset_by(self, by: str | Expr) -> Series: Examples -------- >>> from datetime import datetime - >>> dates = pl.datetime_range( - ... datetime(2000, 1, 1), datetime(2005, 1, 1), "1y", eager=True - ... ) + >>> dates = pl.datetime_range(datetime(2000, 1, 1), datetime(2005, 1, 1), "1y", eager=True) >>> dates shape: (6,) Series: 'datetime' [datetime[μs]] @@ -1542,9 +1512,7 @@ def truncate( 2001-01-01 22:00:00 ] - >>> s = pl.datetime_range( - ... datetime(2001, 1, 1), datetime(2001, 1, 1, 1), "10m", eager=True - ... ) + >>> s = pl.datetime_range(datetime(2001, 1, 1), datetime(2001, 1, 1, 1), "10m", eager=True) >>> s shape: (7,) Series: 'datetime' [datetime[μs]] diff --git a/py-polars/polars/series/list.py b/py-polars/polars/series/list.py index 7695d473adc5..048f01f24757 100644 --- a/py-polars/polars/series/list.py +++ b/py-polars/polars/series/list.py @@ -662,9 +662,7 @@ def eval(self, expr: Expr, *, parallel: bool = False) -> Series: Examples -------- >>> df = pl.DataFrame({"a": [1, 8, 3], "b": [4, 5, 2]}) - >>> df.with_columns( - ... pl.concat_list(["a", "b"]).list.eval(pl.element().rank()).alias("rank") - ... ) + >>> df.with_columns(pl.concat_list(["a", "b"]).list.eval(pl.element().rank()).alias("rank")) shape: (3, 3) ┌─────┬─────┬────────────┐ │ a ┆ b ┆ rank │ diff --git a/py-polars/polars/series/series.py b/py-polars/polars/series/series.py index bc68568cabe8..a3103088d597 100644 --- a/py-polars/polars/series/series.py +++ b/py-polars/polars/series/series.py @@ -4296,9 +4296,7 @@ def set(self, filter: Series, value: int | float | str | bool | None) -> Series: It is better to implement this as follows: - >>> s.to_frame().select( - ... pl.when(pl.col("a") == 2).then(10).otherwise(pl.col("a")) - ... ) + >>> s.to_frame().select(pl.when(pl.col("a") == 2).then(10).otherwise(pl.col("a"))) shape: (3, 1) ┌─────────┐ │ literal │ diff --git a/py-polars/polars/series/string.py b/py-polars/polars/series/string.py index 91937f5767f0..6d205197a0f1 100644 --- a/py-polars/polars/series/string.py +++ b/py-polars/polars/series/string.py @@ -303,9 +303,7 @@ def to_decimal( Examples -------- - >>> s = pl.Series( - ... ["40.12", "3420.13", "120134.19", "3212.98", "12.90", "143.09", "143.9"] - ... ) + >>> s = pl.Series(["40.12", "3420.13", "120134.19", "3212.98", "12.90", "143.09", "143.9"]) >>> s.str.to_decimal() shape: (7,) Series: '' [decimal[*,2]] @@ -820,11 +818,7 @@ def extract_groups(self, pattern: str) -> Series: For example, we can access the first group via the string `"1"`:: - >>> ( - ... pl.Series(["foo bar baz"]) - ... .str.extract_groups(r"(\w+) (.+) (\w+)") - ... .struct["1"] - ... ) + >>> (pl.Series(["foo bar baz"]).str.extract_groups(r"(\w+) (.+) (\w+)").struct["1"]) shape: (1,) Series: '1' [str] [ diff --git a/py-polars/polars/sql/context.py b/py-polars/polars/sql/context.py index b54a502692dc..ec5b8e3b89ca 100644 --- a/py-polars/polars/sql/context.py +++ b/py-polars/polars/sql/context.py @@ -341,9 +341,7 @@ def register_globals(self, n: int | None = None) -> Self: Query using the register variable/frame names - >>> ctx.execute( - ... "SELECT a, b, c FROM df1 LEFT JOIN df2 USING (a) ORDER BY a DESC" - ... ).collect() + >>> ctx.execute("SELECT a, b, c FROM df1 LEFT JOIN df2 USING (a) ORDER BY a DESC").collect() shape: (3, 3) ┌─────┬──────┬──────┐ │ a ┆ b ┆ c │ @@ -430,7 +428,6 @@ def unregister(self, names: str | Collection[str]) -> Self: >>> # register one frame at construction time, and the other two in-scope >>> with pl.SQLContext(tbl0=df0) as ctx: ... ctx.register_many(tbl1=df1, tbl2=df2).tables() - ... ['tbl0', 'tbl1', 'tbl2'] After scope exit, none of the tables registered in-scope remain: diff --git a/py-polars/polars/string_cache.py b/py-polars/polars/string_cache.py index bb1f4eefc554..00c74fe05a8b 100644 --- a/py-polars/polars/string_cache.py +++ b/py-polars/polars/string_cache.py @@ -37,7 +37,6 @@ class StringCache(contextlib.ContextDecorator): >>> with pl.StringCache(): ... s1 = pl.Series("color", ["red", "green", "red"], dtype=pl.Categorical) ... s2 = pl.Series("color", ["blue", "red", "green"], dtype=pl.Categorical) - ... As both Series are constructed under the same global string cache, they can be concatenated. @@ -62,7 +61,6 @@ class StringCache(contextlib.ContextDecorator): ... s1 = pl.Series("color", ["red", "green", "red"], dtype=pl.Categorical) ... s2 = pl.Series("color", ["blue", "red", "green"], dtype=pl.Categorical) ... return pl.concat([s1, s2]) - ... """ diff --git a/py-polars/polars/testing/parametric/primitives.py b/py-polars/polars/testing/parametric/primitives.py index 074c93cedc44..6c36d46a5b81 100644 --- a/py-polars/polars/testing/parametric/primitives.py +++ b/py-polars/polars/testing/parametric/primitives.py @@ -227,7 +227,6 @@ def columns( ... df = pl.DataFrame(schema=[(c.name, c.dtype) for c in columns(punctuation)]) ... assert len(cols) == len(df.columns) ... assert 0 == len(df.rows()) - ... """ # create/assign named columns diff --git a/py-polars/polars/testing/parametric/strategies.py b/py-polars/polars/testing/parametric/strategies.py index d64b398c99fa..164fffd9edd1 100644 --- a/py-polars/polars/testing/parametric/strategies.py +++ b/py-polars/polars/testing/parametric/strategies.py @@ -374,7 +374,6 @@ def create_list_strategy( ... def uint8_pairs(draw, uints=create_list_strategy(pl.UInt8, size=2)): ... pairs = list(zip(draw(uints), draw(uints))) ... return [sorted(ints) for ints in pairs] - ... >>> uint8_pairs().example() # doctest: +SKIP [(12, 22), (15, 131)] >>> uint8_pairs().example() # doctest: +SKIP