From 8b4fc564c9b7ca4ec47112235cb97d6e4df8938d Mon Sep 17 00:00:00 2001 From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> Date: Wed, 20 Sep 2023 13:45:54 -0400 Subject: [PATCH] Address Pandas 2.1.0 deprecations Merged against latest os-climate/main. Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com> --- examples/ITR_UI.py | 4 ++-- requirements.txt | 2 +- src/ITR/data/base_providers.py | 31 +++++++++---------------------- src/ITR/data/template.py | 15 +++++---------- 4 files changed, 17 insertions(+), 35 deletions(-) diff --git a/examples/ITR_UI.py b/examples/ITR_UI.py index 918aaa05..89eb853d 100644 --- a/examples/ITR_UI.py +++ b/examples/ITR_UI.py @@ -1888,7 +1888,7 @@ def update_graph( changed_id = [p["prop_id"] for p in dash.callback_context.triggered][ 0 ] # to catch which widgets were pressed - amended_portfolio = pd.read_json(portfolio_json, orient="split") + amended_portfolio = pd.read_json(io.StringIO(initial_value=portfolio_json), orient="split") # Why does this get lost in translation? amended_portfolio.index.name = "company_id" amended_portfolio = amended_portfolio.assign( @@ -1960,7 +1960,7 @@ def update_graph( scope_mask = amended_portfolio.scope == EScope[scope] filt_df = amended_portfolio[ temp_score_mask & sec_mask & reg_mask & scope_mask - ] # filtering + ].copy() # filtering; copy makes it safe to modify column contents without getting warngins if len(filt_df) == 0: # if after filtering the dataframe is empty # breakpoint() raise PreventUpdate diff --git a/requirements.txt b/requirements.txt index 109c1b43..277bfaa2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -13,7 +13,7 @@ openpyxl==3.0.10 openscm-units==0.5.2 orca==1.8 osc-ingest-tools>=0.4.3 -pandas>=2.0.3 +pandas>=2.1.0 Pint>=0.22 Pint-Pandas>=0.5 psutil==5.9.5 diff --git a/src/ITR/data/base_providers.py b/src/ITR/data/base_providers.py index 57d1260d..8bb95097 100644 --- a/src/ITR/data/base_providers.py +++ b/src/ITR/data/base_providers.py @@ -409,7 +409,6 @@ def _get_intensity_benchmarks( sec_reg_scopes = company_sector_region_scope[["sector", "region", "scope"]] if scope_to_calc is not None: sec_reg_scopes = sec_reg_scopes[sec_reg_scopes.scope.eq(scope_to_calc)] - sec_reg_scopes[~sec_reg_scopes.index.duplicated()] sec_reg_scopes_mi = pd.MultiIndex.from_frame(sec_reg_scopes).unique() bm_proj_t = self._EI_df_t.loc[ range( @@ -429,20 +428,12 @@ def _get_intensity_benchmarks( # This piece of work essentially does a column-based join (to avoid extra transpositions) result = pd.concat( [ - pd.Series() - if x is None - else bm_proj_t[tuple(x[1])].rename((x[0], x[1][-1])) - for y in sec_reg_scopes.iterrows() - # In the happy path, we can use sector/region/scope index as-is - # In the less-happy path, we have to construct sector/'Global'/scope - # In the least happy path, we have to ditch the row because our benchmark does not cover it - for x in [ - y - if tuple(y[1]) in bm_proj_t - else (y[0], (y[1][0], "Global", y[1][2])) - if (y[1][0], "Global", y[1][2]) in bm_proj_t - else None - ] + bm_proj_t[tuple(ser)].rename((idx, ser.iloc[2])) + if tuple(ser) in bm_proj_t + else bm_proj_t[ser_global].rename((idx, ser.iloc[2])) + if (ser_global:=(ser.iloc[0], "Global", ser.iloc[2],)) in bm_proj_t + else pd.Series() + for idx, ser in sec_reg_scopes.iterrows() ], axis=1, ).dropna(axis=1, how="all") @@ -1130,9 +1121,7 @@ def project_ei_trajectories( ) ) else: - backfilled_t = historic_ei_t.apply( - lambda col: col.fillna(method="bfill") - ) + backfilled_t = historic_ei_t.bfill(axis=0) # FIXME: this hack causes backfilling only on dates on or after the first year of the benchmark, which keeps it from disrupting current test cases # while also working on real-world use cases. But we need to formalize this decision. backfilled_t = backfilled_t.reset_index() @@ -1526,14 +1515,12 @@ def _get_trends(self, intensities_t: pd.DataFrame): intensities_t = intensities_t.apply( lambda col: col if col.dtype == np.float64 + # Float64 NA needs to be converted to np.nan before we can apply nominal_values else ITR.nominal_values(col.fillna(np.nan)).astype(np.float64) ) - # FIXME: Pandas 2.1 # Treat NaN ratios as "unchnaged year on year" # FIXME Could we ever have UFloat NaNs here? np.nan is valid UFloat. - ratios_t: pd.DataFrame = intensities_t.rolling( - window=2, axis="index", closed="right" - ).apply(func=self._year_on_year_ratio, raw=True) + ratios_t: pd.DataFrame = intensities_t.rolling(window=2, closed="right").apply(func=self._year_on_year_ratio, raw=True) ratios_t = ratios_t.apply( lambda col: col.fillna(0) if all(col.map(lambda x: ITR.isna(x))) else col ) diff --git a/src/ITR/data/template.py b/src/ITR/data/template.py index b029bd2f..999580d8 100644 --- a/src/ITR/data/template.py +++ b/src/ITR/data/template.py @@ -569,18 +569,15 @@ def _init_from_template_company_data(self, excel_path: str): ].ffill() # NA in exposure is how we drop rows we want to ignore - df = df[df.exposure.notna()] - - # TODO: Fix market_cap column naming inconsistency - df.rename( + df = df[df.exposure.notna()].rename( columns={ "revenue": "company_revenue", "market_cap": "company_market_cap", "ev": "company_enterprise_value", "evic": "company_ev_plus_cash", "assets": "company_total_assets", - }, - inplace=True, + # TODO: Fix market_cap column naming inconsistency + } ) df.loc[df.region.isnull(), "region"] = df.country.map(ITR_country_to_region) @@ -1388,7 +1385,7 @@ def _fixup_name(x): ].droplevel("metric") if prod_base_year.isna().any(): logger.warning( - f"The following companies lack base year production info (will be ignored:\n{prod_base_year[prod_base_year.isna()].index.to_list()}" + f"The following companies lack base year production info (will be ignored):\n{prod_base_year[prod_base_year.isna()].index.to_list()}" ) prod_base_year = prod_base_year[prod_base_year.notna()] prod_metrics = prod_base_year.map(lambda x: f"{x.u:~P}") @@ -1505,9 +1502,7 @@ def fill_blank_or_missing_scopes( ) new_ab.insert(0, "scope", scope_ab) new_ab.set_index("scope", append=True, inplace=True) - df_ab[df_ab.applymap(lambda x: ITR.isna(x))] = new_ab.loc[ - new_ab.index.intersection(df_ab.index) - ] + df_ab[df_ab.map(ITR.isna)] = new_ab.loc[new_ab.index.intersection(df_ab.index)] # DF_AB has gaps filled, but not whole new rows that did not exist before # Drop rows in NEW_AB already covered by DF_AB and consolidate new_ab.drop(index=df_ab.index, inplace=True, errors="ignore")