From 54866a9be3942073090f7e07d965ece97dfcb40a Mon Sep 17 00:00:00 2001 From: KOHLMANN-de Date: Fri, 17 May 2024 21:50:14 +0200 Subject: [PATCH] [patch] Circumvent issue with pandas' Grouper --- iotfunctions/aggregate.py | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/iotfunctions/aggregate.py b/iotfunctions/aggregate.py index e01880c7..5baec5fa 100644 --- a/iotfunctions/aggregate.py +++ b/iotfunctions/aggregate.py @@ -174,6 +174,37 @@ def execute(self, df, start_ts=None, end_ts=None, entities=None, offset=None): f"functions but are missing in the data frame: {str(list(missing_col_names))}. " f"Available columns in data frame: {str(list(df.columns))}") + # Circumvent issue with pandas' Grouper: + # Groupers with frequency 'W', 'MS' and 'AS'/'YS' do not take care about hours/minutes/seconds/microseconds + # /nanoseconds in the timestamps. They leave them unaffected. Therefore, we have to generate the group labels + # by ourselves and to replace the Groupers by the group labels. This makes any aggregator which deploys + # group_base on any other dataframe (= data frame with different index) as df fail. + trouble_makers = {'W-SUN', 'MS', 'AS-JAN'} + corrected_group_base = [] + for item in group_base: + if isinstance(item, pd.Grouper): + freq_string = item.freq.freqstr + if freq_string in trouble_makers: + freq_date_offset = pd.tseries.frequencies.to_offset(freq_string) + time_reset = pd.DateOffset(hour=0, minute=0, second=0, microsecond=0, nanosecond=0) + + # Roll back timestamp to, for example, begin of week, set time to 00:00:00.000000000 (and subtract + # offset of timezone) to get timestamp indicating the begin of aggregation interval + if offset is not None: + group_labels = df.index.get_level_values(item.level).to_series().transform(lambda x: freq_date_offset.rollback(x + offset) + time_reset - offset) + else: + group_labels = df.index.get_level_values(item.level).to_series().transform(lambda x: freq_date_offset.rollback(x) + time_reset) + + corrected_group_base.append(group_labels.to_numpy()) + + else: + corrected_group_base.append(item) + + else: + corrected_group_base.append(item) + + group_base = corrected_group_base + # Split data frame into groups groups = df.groupby(group_base)