From 97e6fd0509189af99904ba55f106e470845e6489 Mon Sep 17 00:00:00 2001 From: Gregory Shimansky Date: Mon, 26 Oct 2020 17:35:59 -0500 Subject: [PATCH 01/11] FIX-#2254: Added dictionary functions to groupby aggregate tests Signed-off-by: Gregory Shimansky --- modin/pandas/test/test_groupby.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/modin/pandas/test/test_groupby.py b/modin/pandas/test/test_groupby.py index b522e26f673..e75c223346a 100644 --- a/modin/pandas/test/test_groupby.py +++ b/modin/pandas/test/test_groupby.py @@ -149,7 +149,12 @@ def test_mixed_dtypes_groupby(as_index): eval_var(modin_groupby, pandas_groupby) eval_skew(modin_groupby, pandas_groupby) - agg_functions = ["min", "max"] + agg_functions = [ + "min", + "max", + {"col2": "sum"}, + {"col2": "max", "col4": "sum", "col5": "min"}, + ] for func in agg_functions: eval_agg(modin_groupby, pandas_groupby, func) eval_aggregate(modin_groupby, pandas_groupby, func) @@ -479,7 +484,12 @@ def test_single_group_row_groupby(): eval_prod(modin_groupby, pandas_groupby) eval_std(modin_groupby, pandas_groupby) - agg_functions = ["min", "max"] + agg_functions = [ + "min", + "max", + {"col2": "sum"}, + {"col2": "max", "col4": "sum", "col5": "min"}, + ] for func in agg_functions: eval_agg(modin_groupby, pandas_groupby, func) eval_aggregate(modin_groupby, pandas_groupby, func) @@ -595,7 +605,7 @@ def test_large_row_groupby(is_by_category): # eval_prod(modin_groupby, pandas_groupby) causes overflows eval_std(modin_groupby, pandas_groupby) - agg_functions = ["min", "max"] + agg_functions = ["min", "max", {"A": "sum"}, {"A": "max", "B": "sum", "C": "min"}] for func in agg_functions: eval_agg(modin_groupby, pandas_groupby, func) eval_aggregate(modin_groupby, pandas_groupby, func) From 67e95fda336ce1d175e9b5cd47724f0ae3389e12 Mon Sep 17 00:00:00 2001 From: Gregory Shimansky Date: Mon, 26 Oct 2020 15:52:44 -0500 Subject: [PATCH 02/11] FIX-#2254: Initial implementation of dictionary functions aggregation Signed-off-by: Gregory Shimansky --- modin/backends/base/query_compiler.py | 10 ---- modin/backends/pandas/query_compiler.py | 70 +++++++++++++++++-------- modin/pandas/groupby.py | 43 +++++---------- 3 files changed, 62 insertions(+), 61 deletions(-) diff --git a/modin/backends/base/query_compiler.py b/modin/backends/base/query_compiler.py index 9a453bc9026..c61e16b42f1 100644 --- a/modin/backends/base/query_compiler.py +++ b/modin/backends/base/query_compiler.py @@ -1429,16 +1429,6 @@ def groupby_agg( drop=drop, ) - def groupby_dict_agg(self, by, func_dict, groupby_args, agg_args, drop=False): - return GroupByDefault.register(pandas.core.groupby.DataFrameGroupBy.aggregate)( - self, - by=by, - func_dict=func_dict, - groupby_args=groupby_args, - agg_args=agg_args, - drop=drop, - ) - # END Manual Partitioning methods def unstack(self, level, fill_value): diff --git a/modin/backends/pandas/query_compiler.py b/modin/backends/pandas/query_compiler.py index 99f9ed4445d..c6d552c9f85 100644 --- a/modin/backends/pandas/query_compiler.py +++ b/modin/backends/pandas/query_compiler.py @@ -2596,29 +2596,57 @@ def groupby_agg( as_index = groupby_kwargs.get("as_index", True) - def groupby_agg_builder(df): - # Set `as_index` to True to track the metadata of the grouping object - # It is used to make sure that between phases we are constructing the - # right index and placing columns in the correct order. - groupby_kwargs["as_index"] = True - - def compute_groupby(df): - grouped_df = df.groupby(by=by, axis=axis, **groupby_kwargs) + if isinstance(agg_func, dict): + + def groupby_agg_builder(df): + # Set `as_index` to True to track the metadata of the grouping object + # It is used to make sure that between phases we are constructing the + # right index and placing columns in the correct order. + groupby_kwargs["as_index"] = True + + def compute_groupby(df): + grouped_df = df.groupby(by=by, axis=axis, **groupby_kwargs) + try: + result = grouped_df.agg(agg_func, **agg_args) + # This happens when the partition is filled with non-numeric data and a + # numeric operation is done. We need to build the index here to avoid + # issues with extracting the index. + except (DataError, TypeError): + result = pandas.DataFrame(index=grouped_df.size().index) + return result + try: - result = agg_func(grouped_df, **agg_kwargs) - # This happens when the partition is filled with non-numeric data and a - # numeric operation is done. We need to build the index here to avoid - # issues with extracting the index. - except (DataError, TypeError): - result = pandas.DataFrame(index=grouped_df.size().index) - return result + return compute_groupby(df) + # This will happen with Arrow buffer read-only errors. We don't want to copy + # all the time, so this will try to fast-path the code first. + except (ValueError, KeyError): + return compute_groupby(df.copy()) - try: - return compute_groupby(df) - # This will happen with Arrow buffer read-only errors. We don't want to copy - # all the time, so this will try to fast-path the code first. - except (ValueError, KeyError): - return compute_groupby(df.copy()) + else: + + def groupby_agg_builder(df): + # Set `as_index` to True to track the metadata of the grouping object + # It is used to make sure that between phases we are constructing the + # right index and placing columns in the correct order. + groupby_kwargs["as_index"] = True + + def compute_groupby(df): + grouped_df = df.groupby(by=by, axis=axis, **groupby_kwargs) + try: + result = agg_func(grouped_df, **agg_kwargs) + # This happens when the partition is filled with non-numeric data and a + # numeric operation is done. We need to build the index here to avoid + # issues with extracting the index. + except (DataError, TypeError): + result = pandas.DataFrame(index=grouped_df.size().index) + return result + + try: + return compute_groupby(df) + # This will happen with Arrow buffer read-only errors. We don't want to copy + # all the time, so this will try to fast-path the code first. + except (ValueError, KeyError): + return compute_groupby(df.copy()) new_modin_frame = self._modin_frame._apply_full_axis( axis, lambda df: groupby_agg_builder(df) diff --git a/modin/pandas/groupby.py b/modin/pandas/groupby.py index 5eedd42759c..86bdd6d2885 100644 --- a/modin/pandas/groupby.py +++ b/modin/pandas/groupby.py @@ -357,6 +357,8 @@ def aggregate(self, func=None, *args, **kwargs): # This is not implemented in pandas, # so we throw a different message raise NotImplementedError("axis other than 0 is not supported") + + relabeling_required = False if isinstance(func, dict) or func is None: def _reconstruct_func(func, **kwargs): @@ -380,50 +382,31 @@ def _reconstruct_func(func, **kwargs): from pandas.core.base import SpecificationError raise SpecificationError("nested renamer is not supported") - if isinstance(self._by, type(self._query_compiler)): - by = list(self._by.columns) - else: - by = self._by - - subset_cols = list(func_dict.keys()) + ( - list(self._by.columns) - if isinstance(self._by, type(self._query_compiler)) - and all(c in self._df.columns for c in self._by.columns) - else [] - ) - result = type(self._df)( - query_compiler=self._df[subset_cols]._query_compiler.groupby_dict_agg( - by=by, - func_dict=func_dict, - groupby_args=self._kwargs, - agg_args=kwargs, - drop=self._drop, - ) - ) - - if relabeling_required: - result = result.iloc[:, order] - result.columns = new_columns - - return result - - if is_list_like(func): + func = func_dict + elif is_list_like(func): return self._default_to_pandas( lambda df, *args, **kwargs: df.aggregate(func, *args, **kwargs), *args, **kwargs, ) - if isinstance(func, str): + elif isinstance(func, str): agg_func = getattr(self, func, None) if callable(agg_func): return agg_func(*args, **kwargs) - return self._apply_agg_function( + + result = self._apply_agg_function( lambda df, *args, **kwargs: df.aggregate(func, *args, **kwargs), drop=self._as_index, *args, **kwargs, ) + if relabeling_required: + result = result.iloc[:, order] + result.columns = new_columns + + return result + agg = aggregate def last(self, **kwargs): From 90efde00aa0813465d82da37c8518fee94149e12 Mon Sep 17 00:00:00 2001 From: Gregory Shimansky Date: Tue, 3 Nov 2020 14:27:57 -0600 Subject: [PATCH 03/11] FIX-#2254: Remove lambda wrapper to allow dictionary to go to backend Signed-off-by: Gregory Shimansky --- modin/backends/pandas/query_compiler.py | 26 ++++++++++++++++++------- modin/pandas/groupby.py | 4 ++-- 2 files changed, 21 insertions(+), 9 deletions(-) diff --git a/modin/backends/pandas/query_compiler.py b/modin/backends/pandas/query_compiler.py index c6d552c9f85..c1d52ecaa1a 100644 --- a/modin/backends/pandas/query_compiler.py +++ b/modin/backends/pandas/query_compiler.py @@ -2573,14 +2573,23 @@ def groupby_agg( groupby_kwargs, drop=False, ): - agg_func = wrap_udf_function(agg_func) + if callable(agg_func): + agg_func = wrap_udf_function(agg_func) if is_multi_by: + # If function was kept as a dictionary until now, it is now necessary to repeat all steps + # that were skipped previously, that is, make it a lambda. This is necessary + # because default to pandas is unable to operate with dictionary aggregation function argument, + # it accepts only callable functions. + if isinstance(agg_func, dict): + callable_func = lambda df, *args, **kwargs: df.aggregate(agg_func, *agg_args, **agg_kwargs) + else: + callable_func = agg_func return super().groupby_agg( by=by, is_multi_by=is_multi_by, axis=axis, - agg_func=agg_func, + agg_func=callable_func, agg_args=agg_args, agg_kwargs=agg_kwargs, groupby_kwargs=groupby_kwargs, @@ -2607,7 +2616,7 @@ def groupby_agg_builder(df): def compute_groupby(df): grouped_df = df.groupby(by=by, axis=axis, **groupby_kwargs) try: - result = grouped_df.agg(agg_func, **agg_args) + result = grouped_df.agg(agg_func) # This happens when the partition is filled with non-numeric data and a # numeric operation is done. We need to build the index here to avoid # issues with extracting the index. @@ -2659,10 +2668,13 @@ def compute_groupby(df): # determening type of raised exception by applying `aggfunc` # to empty DataFrame try: - agg_func( - pandas.DataFrame(index=[1], columns=[1]).groupby(level=0), - **agg_kwargs, - ) + if isinstance(agg_func, dict): + pandas.DataFrame(index=[1], columns=[1]).agg(agg_func) + else: + agg_func( + pandas.DataFrame(index=[1], columns=[1]).groupby(level=0), + **agg_kwargs, + ) except Exception as e: raise type(e)("No numeric types to aggregate.") diff --git a/modin/pandas/groupby.py b/modin/pandas/groupby.py index 86bdd6d2885..74db37de9ca 100644 --- a/modin/pandas/groupby.py +++ b/modin/pandas/groupby.py @@ -395,7 +395,7 @@ def _reconstruct_func(func, **kwargs): return agg_func(*args, **kwargs) result = self._apply_agg_function( - lambda df, *args, **kwargs: df.aggregate(func, *args, **kwargs), + func, drop=self._as_index, *args, **kwargs, @@ -871,7 +871,7 @@ def _apply_agg_function(self, f, drop=True, *args, **kwargs): ------- A new combined DataFrame with the result of all groups. """ - assert callable(f), "'{0}' object is not callable".format(type(f)) + assert callable(f) or isinstance(f, dict), "'{0}' object is not callable and not a dict".format(type(f)) # For aggregations, pandas behavior does this for the result. # For other operations it does not, so we wait until there is an aggregation to From f0c6300dfa0eadfd7e5abbea0da894b61934bf2e Mon Sep 17 00:00:00 2001 From: Gregory Shimansky Date: Wed, 4 Nov 2020 17:47:53 -0600 Subject: [PATCH 04/11] FIX-#2254: Fixed AttributeError not being thrown from getattr Signed-off-by: Gregory Shimansky --- modin/pandas/groupby.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/modin/pandas/groupby.py b/modin/pandas/groupby.py index 74db37de9ca..ee1c3f5a300 100644 --- a/modin/pandas/groupby.py +++ b/modin/pandas/groupby.py @@ -390,7 +390,9 @@ def _reconstruct_func(func, **kwargs): **kwargs, ) elif isinstance(func, str): - agg_func = getattr(self, func, None) + # Using "getattr" here masks possible AttributeError which we throw + # in __getattr__, so we should call __getattr__ directly instead. + agg_func = self.__getattr__(func) if callable(agg_func): return agg_func(*args, **kwargs) From 12debefaff6d632e2afd68d5ca84981215cf6bd6 Mon Sep 17 00:00:00 2001 From: Gregory Shimansky Date: Wed, 4 Nov 2020 18:02:23 -0600 Subject: [PATCH 05/11] FIX-#2254: Lint fixes Signed-off-by: Gregory Shimansky --- modin/backends/pandas/query_compiler.py | 6 +++++- modin/pandas/groupby.py | 4 +++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/modin/backends/pandas/query_compiler.py b/modin/backends/pandas/query_compiler.py index c1d52ecaa1a..f855f1b358b 100644 --- a/modin/backends/pandas/query_compiler.py +++ b/modin/backends/pandas/query_compiler.py @@ -2582,7 +2582,11 @@ def groupby_agg( # because default to pandas is unable to operate with dictionary aggregation function argument, # it accepts only callable functions. if isinstance(agg_func, dict): - callable_func = lambda df, *args, **kwargs: df.aggregate(agg_func, *agg_args, **agg_kwargs) + callable_func = wrap_udf_function( + lambda df, *args, **kwargs: df.aggregate( + agg_func, *agg_args, **agg_kwargs + ) + ) else: callable_func = agg_func return super().groupby_agg( diff --git a/modin/pandas/groupby.py b/modin/pandas/groupby.py index ee1c3f5a300..20003a4f0f7 100644 --- a/modin/pandas/groupby.py +++ b/modin/pandas/groupby.py @@ -873,7 +873,9 @@ def _apply_agg_function(self, f, drop=True, *args, **kwargs): ------- A new combined DataFrame with the result of all groups. """ - assert callable(f) or isinstance(f, dict), "'{0}' object is not callable and not a dict".format(type(f)) + assert callable(f) or isinstance( + f, dict + ), "'{0}' object is not callable and not a dict".format(type(f)) # For aggregations, pandas behavior does this for the result. # For other operations it does not, so we wait until there is an aggregation to From 18249a7d70c9ab2bd5108bf4f1c0d1ac7c979226 Mon Sep 17 00:00:00 2001 From: ienkovich Date: Thu, 5 Nov 2020 06:41:34 -0600 Subject: [PATCH 06/11] FEAT-#2363: fix index name setter in OmniSci backend Signed-off-by: ienkovich --- modin/experimental/engines/omnisci_on_ray/frame/data.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modin/experimental/engines/omnisci_on_ray/frame/data.py b/modin/experimental/engines/omnisci_on_ray/frame/data.py index 88bd6c3cb24..42d59f70e11 100644 --- a/modin/experimental/engines/omnisci_on_ray/frame/data.py +++ b/modin/experimental/engines/omnisci_on_ray/frame/data.py @@ -1251,11 +1251,11 @@ def set_index_name(self, name): return self names = self._mangle_index_names([name]) + exprs = OrderedDict() if self._index_cols is None: - exprs = OrderedDict() - exprs[name] = self.ref("__rowid__") + exprs[names[0]] = self.ref("__rowid__") else: - exprs = self._index_exprs() + exprs[names[0]] = self.ref(self._index_cols[0]) for col in self.columns: exprs[col] = self.ref(col) From c1dc213bf73074d36f07982c01983e9f42519fad Mon Sep 17 00:00:00 2001 From: Gregory Shimansky Date: Thu, 5 Nov 2020 13:10:36 -0600 Subject: [PATCH 07/11] FIX-#2254: Removed obsolete groupby_dict_agg API function Signed-off-by: Gregory Shimansky --- .../backends/omnisci/query_compiler.py | 27 ------------------- 1 file changed, 27 deletions(-) diff --git a/modin/experimental/backends/omnisci/query_compiler.py b/modin/experimental/backends/omnisci/query_compiler.py index eadee462d9f..35ce16e9917 100644 --- a/modin/experimental/backends/omnisci/query_compiler.py +++ b/modin/experimental/backends/omnisci/query_compiler.py @@ -279,33 +279,6 @@ def groupby_agg( ) return self.__constructor__(new_frame) - def groupby_dict_agg(self, by, func_dict, groupby_args, agg_args, drop=False): - """Apply aggregation functions to a grouped dataframe per-column. - - Parameters - ---------- - by : DFAlgQueryCompiler - The column to group by - func_dict : dict of str, callable/string - The dictionary mapping of column to function - groupby_args : dict - The dictionary of keyword arguments for the group by. - agg_args : dict - The dictionary of keyword arguments for the aggregation functions - drop : bool - Whether or not to drop the column from the data. - - Returns - ------- - DFAlgQueryCompiler - The result of the per-column aggregations on the grouped dataframe. - """ - # TODO: handle `drop` arg - new_frame = self._modin_frame.groupby_agg( - by, 0, func_dict, groupby_args, **agg_args - ) - return self.__constructor__(new_frame) - def count(self, **kwargs): return self._agg("count", **kwargs) From 9eea77c0da288c63753c55ad9c21cc0805a047d9 Mon Sep 17 00:00:00 2001 From: Gregory Shimansky Date: Fri, 6 Nov 2020 12:14:13 -0600 Subject: [PATCH 08/11] FIX-#2254: Fixed dict aggregate for base backend Signed-off-by: Gregory Shimansky --- .../functions/default_methods/groupby_default.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/modin/data_management/functions/default_methods/groupby_default.py b/modin/data_management/functions/default_methods/groupby_default.py index b6ae497c75f..0cbfd737479 100644 --- a/modin/data_management/functions/default_methods/groupby_default.py +++ b/modin/data_management/functions/default_methods/groupby_default.py @@ -80,7 +80,10 @@ def fn( grp = df.groupby(by, axis=axis, **groupby_args) agg_func = cls.get_func(grp, key, **kwargs) - result = agg_func(grp, **agg_args) + if isinstance(agg_func, dict): + result = grp.agg(agg_func, **agg_args) + else: + result = agg_func(grp, **agg_args) if not is_multi_by: if as_index: From a99ca6c638ee365c7b8df4013ee31752fef0f657 Mon Sep 17 00:00:00 2001 From: Gregory Shimansky Date: Thu, 12 Nov 2020 14:21:23 -0600 Subject: [PATCH 09/11] FIX-#2254: Address reformatting comments Signed-off-by: Gregory Shimansky --- modin/backends/pandas/query_compiler.py | 87 +++++++------------ .../default_methods/groupby_default.py | 9 +- 2 files changed, 36 insertions(+), 60 deletions(-) diff --git a/modin/backends/pandas/query_compiler.py b/modin/backends/pandas/query_compiler.py index f855f1b358b..62e5528fc4f 100644 --- a/modin/backends/pandas/query_compiler.py +++ b/modin/backends/pandas/query_compiler.py @@ -2609,57 +2609,33 @@ def groupby_agg( as_index = groupby_kwargs.get("as_index", True) - if isinstance(agg_func, dict): - - def groupby_agg_builder(df): - # Set `as_index` to True to track the metadata of the grouping object - # It is used to make sure that between phases we are constructing the - # right index and placing columns in the correct order. - groupby_kwargs["as_index"] = True - - def compute_groupby(df): - grouped_df = df.groupby(by=by, axis=axis, **groupby_kwargs) - try: - result = grouped_df.agg(agg_func) - # This happens when the partition is filled with non-numeric data and a - # numeric operation is done. We need to build the index here to avoid - # issues with extracting the index. - except (DataError, TypeError): - result = pandas.DataFrame(index=grouped_df.size().index) - return result - + def groupby_agg_builder(df): + # Set `as_index` to True to track the metadata of the grouping object + # It is used to make sure that between phases we are constructing the + # right index and placing columns in the correct order. + groupby_kwargs["as_index"] = True + + def compute_groupby(df): + grouped_df = df.groupby(by=by, axis=axis, **groupby_kwargs) try: - return compute_groupby(df) - # This will happen with Arrow buffer read-only errors. We don't want to copy - # all the time, so this will try to fast-path the code first. - except (ValueError, KeyError): - return compute_groupby(df.copy()) - - else: - - def groupby_agg_builder(df): - # Set `as_index` to True to track the metadata of the grouping object - # It is used to make sure that between phases we are constructing the - # right index and placing columns in the correct order. - groupby_kwargs["as_index"] = True - - def compute_groupby(df): - grouped_df = df.groupby(by=by, axis=axis, **groupby_kwargs) - try: - result = agg_func(grouped_df, **agg_kwargs) - # This happens when the partition is filled with non-numeric data and a - # numeric operation is done. We need to build the index here to avoid - # issues with extracting the index. - except (DataError, TypeError): - result = pandas.DataFrame(index=grouped_df.size().index) - return result + result = ( + grouped_df.agg(agg_func) + if isinstance(agg_func, dict) + else agg_func(grouped_df, **agg_kwargs) + ) + # This happens when the partition is filled with non-numeric data and a + # numeric operation is done. We need to build the index here to avoid + # issues with extracting the index. + except (DataError, TypeError): + result = pandas.DataFrame(index=grouped_df.size().index) + return result - try: - return compute_groupby(df) - # This will happen with Arrow buffer read-only errors. We don't want to copy - # all the time, so this will try to fast-path the code first. - except (ValueError, KeyError): - return compute_groupby(df.copy()) + try: + return compute_groupby(df) + # This will happen with Arrow buffer read-only errors. We don't want to copy + # all the time, so this will try to fast-path the code first. + except (ValueError, KeyError): + return compute_groupby(df.copy()) new_modin_frame = self._modin_frame._apply_full_axis( axis, lambda df: groupby_agg_builder(df) @@ -2672,13 +2648,12 @@ def compute_groupby(df): # determening type of raised exception by applying `aggfunc` # to empty DataFrame try: - if isinstance(agg_func, dict): - pandas.DataFrame(index=[1], columns=[1]).agg(agg_func) - else: - agg_func( - pandas.DataFrame(index=[1], columns=[1]).groupby(level=0), - **agg_kwargs, - ) + pandas.DataFrame(index=[1], columns=[1]).agg(agg_func) if isinstance( + agg_func, dict + ) else agg_func( + pandas.DataFrame(index=[1], columns=[1]).groupby(level=0), + **agg_kwargs, + ) except Exception as e: raise type(e)("No numeric types to aggregate.") diff --git a/modin/data_management/functions/default_methods/groupby_default.py b/modin/data_management/functions/default_methods/groupby_default.py index 0cbfd737479..e6cd40675e7 100644 --- a/modin/data_management/functions/default_methods/groupby_default.py +++ b/modin/data_management/functions/default_methods/groupby_default.py @@ -80,10 +80,11 @@ def fn( grp = df.groupby(by, axis=axis, **groupby_args) agg_func = cls.get_func(grp, key, **kwargs) - if isinstance(agg_func, dict): - result = grp.agg(agg_func, **agg_args) - else: - result = agg_func(grp, **agg_args) + result = ( + grp.agg(agg_func, **agg_args) + if isinstance(agg_func, dict) + else agg_func(grp, **agg_args) + ) if not is_multi_by: if as_index: From 412c1c91cd4141e4b23295e3ff25bb86d596faad Mon Sep 17 00:00:00 2001 From: Gregory Shimansky Date: Fri, 13 Nov 2020 10:46:47 -0600 Subject: [PATCH 10/11] FIX-#2254: Remove whitespace Signed-off-by: Gregory Shimansky --- modin/pandas/groupby.py | 1 - 1 file changed, 1 deletion(-) diff --git a/modin/pandas/groupby.py b/modin/pandas/groupby.py index 20003a4f0f7..3329a0412c1 100644 --- a/modin/pandas/groupby.py +++ b/modin/pandas/groupby.py @@ -406,7 +406,6 @@ def _reconstruct_func(func, **kwargs): if relabeling_required: result = result.iloc[:, order] result.columns = new_columns - return result agg = aggregate From 6917382e82dd17339d574a4defe2a1559958dd72 Mon Sep 17 00:00:00 2001 From: Gregory Shimansky Date: Fri, 13 Nov 2020 12:46:18 -0600 Subject: [PATCH 11/11] FIX-#2254: Removed redundant argument conversion because it is already done inside of base backend. Signed-off-by: Gregory Shimansky --- modin/backends/pandas/query_compiler.py | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/modin/backends/pandas/query_compiler.py b/modin/backends/pandas/query_compiler.py index 62e5528fc4f..2567f33e026 100644 --- a/modin/backends/pandas/query_compiler.py +++ b/modin/backends/pandas/query_compiler.py @@ -2577,23 +2577,11 @@ def groupby_agg( agg_func = wrap_udf_function(agg_func) if is_multi_by: - # If function was kept as a dictionary until now, it is now necessary to repeat all steps - # that were skipped previously, that is, make it a lambda. This is necessary - # because default to pandas is unable to operate with dictionary aggregation function argument, - # it accepts only callable functions. - if isinstance(agg_func, dict): - callable_func = wrap_udf_function( - lambda df, *args, **kwargs: df.aggregate( - agg_func, *agg_args, **agg_kwargs - ) - ) - else: - callable_func = agg_func return super().groupby_agg( by=by, is_multi_by=is_multi_by, axis=axis, - agg_func=callable_func, + agg_func=agg_func, agg_args=agg_args, agg_kwargs=agg_kwargs, groupby_kwargs=groupby_kwargs,