From 528ced34f2040ecdff5b3c97eeda4e1ced21978f Mon Sep 17 00:00:00 2001 From: MarcoGorelli <33491632+MarcoGorelli@users.noreply.github.com> Date: Sat, 15 Jul 2023 13:51:59 +0100 Subject: [PATCH] docs(python): add big warnings about using apply --- py-polars/polars/dataframe/frame.py | 4 ++++ py-polars/polars/dataframe/groupby.py | 4 ++++ py-polars/polars/expr/expr.py | 4 ++++ py-polars/polars/functions/lazy.py | 4 ++++ py-polars/polars/lazyframe/groupby.py | 4 ++++ py-polars/polars/series/series.py | 4 ++++ 6 files changed, 24 insertions(+) diff --git a/py-polars/polars/dataframe/frame.py b/py-polars/polars/dataframe/frame.py index dc648c8a2883..e0c46a2c07f9 100644 --- a/py-polars/polars/dataframe/frame.py +++ b/py-polars/polars/dataframe/frame.py @@ -5682,6 +5682,10 @@ def apply( """ Apply a custom/user-defined function (UDF) over the rows of the DataFrame. + .. warning:: + This method is much slower than the native expressions API. + Only use it if you cannot implement your logic otherwise. + The UDF will receive each row as a tuple of values: ``udf(row)``. Implementing logic using a Python function is almost always _significantly_ diff --git a/py-polars/polars/dataframe/groupby.py b/py-polars/polars/dataframe/groupby.py index cad4e0a15b60..573938c59a08 100644 --- a/py-polars/polars/dataframe/groupby.py +++ b/py-polars/polars/dataframe/groupby.py @@ -252,6 +252,10 @@ def apply(self, function: Callable[[DataFrame], DataFrame]) -> DataFrame: """ Apply a custom/user-defined function (UDF) over the groups as a sub-DataFrame. + .. warning:: + This method is much slower than the native expressions API. + Only use it if you cannot implement your logic otherwise. + Implementing logic using a Python function is almost always _significantly_ slower and more memory intensive than implementing the same logic using the native expression API because: diff --git a/py-polars/polars/expr/expr.py b/py-polars/polars/expr/expr.py index 768bed40458d..cc9998946a76 100644 --- a/py-polars/polars/expr/expr.py +++ b/py-polars/polars/expr/expr.py @@ -3613,6 +3613,10 @@ def apply( """ Apply a custom/user-defined function (UDF) in a GroupBy or Projection context. + .. warning:: + This method is much slower than the native expressions API. + Only use it if you cannot implement your logic otherwise. + Depending on the context it has the following behavior: * Selection diff --git a/py-polars/polars/functions/lazy.py b/py-polars/polars/functions/lazy.py index 97a2a9aefa7f..3503a3b0b791 100644 --- a/py-polars/polars/functions/lazy.py +++ b/py-polars/polars/functions/lazy.py @@ -1164,6 +1164,10 @@ def apply( """ Apply a custom/user-defined function (UDF) in a GroupBy context. + .. warning:: + This method is much slower than the native expressions API. + Only use it if you cannot implement your logic otherwise. + Depending on the context it has the following behavior: * Select diff --git a/py-polars/polars/lazyframe/groupby.py b/py-polars/polars/lazyframe/groupby.py index fef864b8be3d..a75726cde9ec 100644 --- a/py-polars/polars/lazyframe/groupby.py +++ b/py-polars/polars/lazyframe/groupby.py @@ -163,6 +163,10 @@ def apply( """ Apply a custom/user-defined function (UDF) over the groups as a new DataFrame. + .. warning:: + This method is much slower than the native expressions API. + Only use it if you cannot implement your logic otherwise. + Using this is considered an anti-pattern. This will be very slow because: - it forces the engine to materialize the whole `DataFrames` for the groups. diff --git a/py-polars/polars/series/series.py b/py-polars/polars/series/series.py index b36da7732e86..e39ed3f01cf8 100644 --- a/py-polars/polars/series/series.py +++ b/py-polars/polars/series/series.py @@ -4364,6 +4364,10 @@ def apply( """ Apply a custom/user-defined function (UDF) over elements in this Series. + .. warning:: + This method is much slower than the native expressions API. + Only use it if you cannot implement your logic otherwise. + If the function returns a different datatype, the return_dtype arg should be set, otherwise the method will fail.