DOC: fix RT03, ES01 for pandas.core.groupby.DataFrameGroupBy.agg and …

…pandas.core.groupby.DataFrameGroupBy.aggregate (#59869) * DOC: add double backticks for sphinx compatibility Co-authored-by: mroeschke <mroeschke@users.noreply.github.com> * DOC: remove _agg_template_frame Co-authored-by: mroeschke <mroeschke@users.noreply.github.com> Co-authored-by: rhshadrach <rhshadrach@users.noreply.github.com> * DOC: fix RT03, ES01 for pandas.core.groupby.DataFrameGroupBy.aggregate --------- Co-authored-by: mroeschke <mroeschke@users.noreply.github.com> Co-authored-by: rhshadrach <rhshadrach@users.noreply.github.com>
pandas-dev · Sep 28, 2024 · cf12e67 · cf12e67
1 parent 96de1f1
commit cf12e67
Show file tree

Hide file tree

Showing 4 changed files with 174 additions and 83 deletions.
diff --git a/ci/code_checks.sh b/ci/code_checks.sh
@@ -127,8 +127,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
         -i "pandas.arrays.SparseArray PR07,SA01" \
         -i "pandas.arrays.TimedeltaArray PR07,SA01" \
         -i "pandas.core.groupby.DataFrameGroupBy.__iter__ RT03,SA01" \
-        -i "pandas.core.groupby.DataFrameGroupBy.agg RT03" \
-        -i "pandas.core.groupby.DataFrameGroupBy.aggregate RT03" \
         -i "pandas.core.groupby.DataFrameGroupBy.boxplot PR07,RT03,SA01" \
         -i "pandas.core.groupby.DataFrameGroupBy.get_group RT03,SA01" \
         -i "pandas.core.groupby.DataFrameGroupBy.groups SA01" \

diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py
@@ -67,7 +67,6 @@
 from pandas.core.groupby.groupby import (
     GroupBy,
     GroupByPlot,
-    _agg_template_frame,
     _agg_template_series,
     _transform_template,
 )
@@ -1515,8 +1514,181 @@ class DataFrameGroupBy(GroupBy[DataFrame]):
     """
     )
 
-    @doc(_agg_template_frame, examples=_agg_examples_doc, klass="DataFrame")
     def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs):
+        """
+        Aggregate using one or more operations.
+
+        The ``aggregate`` function allows the application of one or more aggregation
+        operations on groups of data within a DataFrameGroupBy object. It supports
+        various aggregation methods, including user-defined functions and predefined
+        functions such as 'sum', 'mean', etc.
+
+        Parameters
+        ----------
+        func : function, str, list, dict or None
+            Function to use for aggregating the data. If a function, must either
+            work when passed a DataFrame or when passed to DataFrame.apply.
+
+            Accepted combinations are:
+
+            - function
+            - string function name
+            - list of functions and/or function names, e.g. ``[np.sum, 'mean']``
+            - dict of index labels -> functions, function names or list of such.
+            - None, in which case ``**kwargs`` are used with Named Aggregation. Here the
+              output has one column for each element in ``**kwargs``. The name of the
+              column is keyword, whereas the value determines the aggregation used to
+              compute the values in the column.
+
+              Can also accept a Numba JIT function with
+              ``engine='numba'`` specified. Only passing a single function is supported
+              with this engine.
+
+              If the ``'numba'`` engine is chosen, the function must be
+              a user defined function with ``values`` and ``index`` as the
+              first and second arguments respectively in the function signature.
+              Each group's index will be passed to the user defined function
+              and optionally available for use.
+
+        *args
+            Positional arguments to pass to func.
+        engine : str, default None
+            * ``'cython'`` : Runs the function through C-extensions from cython.
+            * ``'numba'`` : Runs the function through JIT compiled code from numba.
+            * ``None`` : Defaults to ``'cython'`` or globally setting
+                ``compute.use_numba``
+
+        engine_kwargs : dict, default None
+            * For ``'cython'`` engine, there are no accepted ``engine_kwargs``
+            * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil``
+              and ``parallel`` dictionary keys. The values must either be ``True`` or
+              ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is
+              ``{'nopython': True, 'nogil': False, 'parallel': False}`` and will be
+              applied to the function
+
+        **kwargs
+            * If ``func`` is None, ``**kwargs`` are used to define the output names and
+              aggregations via Named Aggregation. See ``func`` entry.
+            * Otherwise, keyword arguments to be passed into func.
+
+        Returns
+        -------
+        DataFrame
+            Aggregated DataFrame based on the grouping and the applied aggregation
+            functions.
+
+        See Also
+        --------
+        DataFrame.groupby.apply : Apply function func group-wise
+            and combine the results together.
+        DataFrame.groupby.transform : Transforms the Series on each group
+            based on the given function.
+        DataFrame.aggregate : Aggregate using one or more operations.
+
+        Notes
+        -----
+        When using ``engine='numba'``, there will be no "fall back" behavior internally.
+        The group data and group index will be passed as numpy arrays to the JITed
+        user defined function, and no alternative execution attempts will be tried.
+
+        Functions that mutate the passed object can produce unexpected
+        behavior or errors and are not supported. See :ref:`gotchas.udf-mutation`
+        for more details.
+
+        .. versionchanged:: 1.3.0
+
+            The resulting dtype will reflect the return value of the passed ``func``,
+            see the examples below.
+
+        Examples
+        --------
+        >>> data = {
+        ...     "A": [1, 1, 2, 2],
+        ...     "B": [1, 2, 3, 4],
+        ...     "C": [0.362838, 0.227877, 1.267767, -0.562860],
+        ... }
+        >>> df = pd.DataFrame(data)
+        >>> df
+           A  B         C
+        0  1  1  0.362838
+        1  1  2  0.227877
+        2  2  3  1.267767
+        3  2  4 -0.562860
+
+        The aggregation is for each column.
+
+        >>> df.groupby("A").agg("min")
+           B         C
+        A
+        1  1  0.227877
+        2  3 -0.562860
+
+        Multiple aggregations
+
+        >>> df.groupby("A").agg(["min", "max"])
+            B             C
+          min max       min       max
+        A
+        1   1   2  0.227877  0.362838
+        2   3   4 -0.562860  1.267767
+
+        Select a column for aggregation
+
+        >>> df.groupby("A").B.agg(["min", "max"])
+           min  max
+        A
+        1    1    2
+        2    3    4
+
+        User-defined function for aggregation
+
+        >>> df.groupby("A").agg(lambda x: sum(x) + 2)
+            B          C
+        A
+        1       5       2.590715
+        2       9       2.704907
+
+        Different aggregations per column
+
+        >>> df.groupby("A").agg({"B": ["min", "max"], "C": "sum"})
+            B             C
+          min max       sum
+        A
+        1   1   2  0.590715
+        2   3   4  0.704907
+
+        To control the output names with different aggregations per column,
+        pandas supports "named aggregation"
+
+        >>> df.groupby("A").agg(
+        ...     b_min=pd.NamedAgg(column="B", aggfunc="min"),
+        ...     c_sum=pd.NamedAgg(column="C", aggfunc="sum"),
+        ... )
+           b_min     c_sum
+        A
+        1      1  0.590715
+        2      3  0.704907
+
+        - The keywords are the *output* column names
+        - The values are tuples whose first element is the column to select
+          and the second element is the aggregation to apply to that column.
+          Pandas provides the ``pandas.NamedAgg`` namedtuple with the fields
+          ``['column', 'aggfunc']`` to make it clearer what the arguments are.
+          As usual, the aggregation can be a callable or a string alias.
+
+        See :ref:`groupby.aggregate.named` for more.
+
+        .. versionchanged:: 1.3.0
+
+            The resulting dtype will reflect the return value of the aggregating
+            function.
+
+        >>> df.groupby("A")[["B"]].agg(lambda x: x.astype(float).min())
+              B
+        A
+        1   1.0
+        2   3.0
+        """
         relabeling, func, columns, order = reconstruct_func(func, **kwargs)
         func = maybe_mangle_lambdas(func)
 

diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py
@@ -445,84 +445,6 @@ class providing the base-class of operations.
     see the examples below.
 {examples}"""
 
-_agg_template_frame = """
-Aggregate using one or more operations.
-
-Parameters
-----------
-func : function, str, list, dict or None
-    Function to use for aggregating the data. If a function, must either
-    work when passed a {klass} or when passed to {klass}.apply.
-
-    Accepted combinations are:
-
-    - function
-    - string function name
-    - list of functions and/or function names, e.g. ``[np.sum, 'mean']``
-    - dict of index labels -> functions, function names or list of such.
-    - None, in which case ``**kwargs`` are used with Named Aggregation. Here the
-      output has one column for each element in ``**kwargs``. The name of the
-      column is keyword, whereas the value determines the aggregation used to compute
-      the values in the column.
-
-      Can also accept a Numba JIT function with
-      ``engine='numba'`` specified. Only passing a single function is supported
-      with this engine.
-
-      If the ``'numba'`` engine is chosen, the function must be
-      a user defined function with ``values`` and ``index`` as the
-      first and second arguments respectively in the function signature.
-      Each group's index will be passed to the user defined function
-      and optionally available for use.
-
-*args
-    Positional arguments to pass to func.
-engine : str, default None
-    * ``'cython'`` : Runs the function through C-extensions from cython.
-    * ``'numba'`` : Runs the function through JIT compiled code from numba.
-    * ``None`` : Defaults to ``'cython'`` or globally setting ``compute.use_numba``
-
-engine_kwargs : dict, default None
-    * For ``'cython'`` engine, there are no accepted ``engine_kwargs``
-    * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil``
-      and ``parallel`` dictionary keys. The values must either be ``True`` or
-      ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is
-      ``{{'nopython': True, 'nogil': False, 'parallel': False}}`` and will be
-      applied to the function
-
-**kwargs
-    * If ``func`` is None, ``**kwargs`` are used to define the output names and
-      aggregations via Named Aggregation. See ``func`` entry.
-    * Otherwise, keyword arguments to be passed into func.
-
-Returns
--------
-{klass}
-
-See Also
---------
-{klass}.groupby.apply : Apply function func group-wise
-    and combine the results together.
-{klass}.groupby.transform : Transforms the Series on each group
-    based on the given function.
-{klass}.aggregate : Aggregate using one or more operations.
-
-Notes
------
-When using ``engine='numba'``, there will be no "fall back" behavior internally.
-The group data and group index will be passed as numpy arrays to the JITed
-user defined function, and no alternative execution attempts will be tried.
-
-Functions that mutate the passed object can produce unexpected
-behavior or errors and are not supported. See :ref:`gotchas.udf-mutation`
-for more details.
-
-.. versionchanged:: 1.3.0
-
-    The resulting dtype will reflect the return value of the passed ``func``,
-    see the examples below.
-{examples}"""
-
 
 @final
 class GroupByPlot(PandasObject):

diff --git a/scripts/validate_unwanted_patterns.py b/scripts/validate_unwanted_patterns.py
@@ -30,7 +30,6 @@
     "_new_Index",
     "_new_PeriodIndex",
     "_agg_template_series",
-    "_agg_template_frame",
     "_pipe_template",
     "_apply_groupings_depr",
     "__main__",