From 8b4fc564c9b7ca4ec47112235cb97d6e4df8938d Mon Sep 17 00:00:00 2001
From: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com>
Date: Wed, 20 Sep 2023 13:45:54 -0400
Subject: [PATCH] Address Pandas 2.1.0 deprecations

Merged against latest os-climate/main.

Signed-off-by: Michael Tiemann <72577720+MichaelTiemannOSC@users.noreply.github.com>
---
 examples/ITR_UI.py             |  4 ++--
 requirements.txt               |  2 +-
 src/ITR/data/base_providers.py | 31 +++++++++----------------------
 src/ITR/data/template.py       | 15 +++++----------
 4 files changed, 17 insertions(+), 35 deletions(-)

diff --git a/examples/ITR_UI.py b/examples/ITR_UI.py
index 918aaa05..89eb853d 100644
--- a/examples/ITR_UI.py
+++ b/examples/ITR_UI.py
@@ -1888,7 +1888,7 @@ def update_graph(
     changed_id = [p["prop_id"] for p in dash.callback_context.triggered][
         0
     ]  # to catch which widgets were pressed
-    amended_portfolio = pd.read_json(portfolio_json, orient="split")
+    amended_portfolio = pd.read_json(io.StringIO(initial_value=portfolio_json), orient="split")
     # Why does this get lost in translation?
     amended_portfolio.index.name = "company_id"
     amended_portfolio = amended_portfolio.assign(
@@ -1960,7 +1960,7 @@ def update_graph(
         scope_mask = amended_portfolio.scope == EScope[scope]
     filt_df = amended_portfolio[
         temp_score_mask & sec_mask & reg_mask & scope_mask
-    ]  # filtering
+    ].copy()  # filtering; copy makes it safe to modify column contents without getting warngins
     if len(filt_df) == 0:  # if after filtering the dataframe is empty
         # breakpoint()
         raise PreventUpdate
diff --git a/requirements.txt b/requirements.txt
index 109c1b43..277bfaa2 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -13,7 +13,7 @@ openpyxl==3.0.10
 openscm-units==0.5.2
 orca==1.8
 osc-ingest-tools>=0.4.3
-pandas>=2.0.3
+pandas>=2.1.0
 Pint>=0.22
 Pint-Pandas>=0.5
 psutil==5.9.5
diff --git a/src/ITR/data/base_providers.py b/src/ITR/data/base_providers.py
index 57d1260d..8bb95097 100644
--- a/src/ITR/data/base_providers.py
+++ b/src/ITR/data/base_providers.py
@@ -409,7 +409,6 @@ def _get_intensity_benchmarks(
         sec_reg_scopes = company_sector_region_scope[["sector", "region", "scope"]]
         if scope_to_calc is not None:
             sec_reg_scopes = sec_reg_scopes[sec_reg_scopes.scope.eq(scope_to_calc)]
-        sec_reg_scopes[~sec_reg_scopes.index.duplicated()]
         sec_reg_scopes_mi = pd.MultiIndex.from_frame(sec_reg_scopes).unique()
         bm_proj_t = self._EI_df_t.loc[
             range(
@@ -429,20 +428,12 @@ def _get_intensity_benchmarks(
         # This piece of work essentially does a column-based join (to avoid extra transpositions)
         result = pd.concat(
             [
-                pd.Series()
-                if x is None
-                else bm_proj_t[tuple(x[1])].rename((x[0], x[1][-1]))
-                for y in sec_reg_scopes.iterrows()
-                # In the happy path, we can use sector/region/scope index as-is
-                # In the less-happy path, we have to construct sector/'Global'/scope
-                # In the least happy path, we have to ditch the row because our benchmark does not cover it
-                for x in [
-                    y
-                    if tuple(y[1]) in bm_proj_t
-                    else (y[0], (y[1][0], "Global", y[1][2]))
-                    if (y[1][0], "Global", y[1][2]) in bm_proj_t
-                    else None
-                ]
+                bm_proj_t[tuple(ser)].rename((idx, ser.iloc[2]))
+                if tuple(ser) in bm_proj_t
+                else bm_proj_t[ser_global].rename((idx, ser.iloc[2]))
+                if (ser_global:=(ser.iloc[0], "Global", ser.iloc[2],)) in bm_proj_t
+                else pd.Series()
+                for idx, ser in sec_reg_scopes.iterrows()
             ],
             axis=1,
         ).dropna(axis=1, how="all")
@@ -1130,9 +1121,7 @@ def project_ei_trajectories(
                     )
                 )
             else:
-                backfilled_t = historic_ei_t.apply(
-                    lambda col: col.fillna(method="bfill")
-                )
+                backfilled_t = historic_ei_t.bfill(axis=0)
             # FIXME: this hack causes backfilling only on dates on or after the first year of the benchmark, which keeps it from disrupting current test cases
             # while also working on real-world use cases.  But we need to formalize this decision.
             backfilled_t = backfilled_t.reset_index()
@@ -1526,14 +1515,12 @@ def _get_trends(self, intensities_t: pd.DataFrame):
         intensities_t = intensities_t.apply(
             lambda col: col
             if col.dtype == np.float64
+            # Float64 NA needs to be converted to np.nan before we can apply nominal_values
             else ITR.nominal_values(col.fillna(np.nan)).astype(np.float64)
         )
-        # FIXME: Pandas 2.1
         # Treat NaN ratios as "unchnaged year on year"
         # FIXME Could we ever have UFloat NaNs here?  np.nan is valid UFloat.
-        ratios_t: pd.DataFrame = intensities_t.rolling(
-            window=2, axis="index", closed="right"
-        ).apply(func=self._year_on_year_ratio, raw=True)
+        ratios_t: pd.DataFrame = intensities_t.rolling(window=2, closed="right").apply(func=self._year_on_year_ratio, raw=True)
         ratios_t = ratios_t.apply(
             lambda col: col.fillna(0) if all(col.map(lambda x: ITR.isna(x))) else col
         )
diff --git a/src/ITR/data/template.py b/src/ITR/data/template.py
index b029bd2f..999580d8 100644
--- a/src/ITR/data/template.py
+++ b/src/ITR/data/template.py
@@ -569,18 +569,15 @@ def _init_from_template_company_data(self, excel_path: str):
                 ].ffill()
 
         # NA in exposure is how we drop rows we want to ignore
-        df = df[df.exposure.notna()]
-
-        # TODO: Fix market_cap column naming inconsistency
-        df.rename(
+        df = df[df.exposure.notna()].rename(
             columns={
                 "revenue": "company_revenue",
                 "market_cap": "company_market_cap",
                 "ev": "company_enterprise_value",
                 "evic": "company_ev_plus_cash",
                 "assets": "company_total_assets",
-            },
-            inplace=True,
+                # TODO: Fix market_cap column naming inconsistency
+            }
         )
         df.loc[df.region.isnull(), "region"] = df.country.map(ITR_country_to_region)
 
@@ -1388,7 +1385,7 @@ def _fixup_name(x):
             ].droplevel("metric")
             if prod_base_year.isna().any():
                 logger.warning(
-                    f"The following companies lack base year production info (will be ignored:\n{prod_base_year[prod_base_year.isna()].index.to_list()}"
+                    f"The following companies lack base year production info (will be ignored):\n{prod_base_year[prod_base_year.isna()].index.to_list()}"
                 )
                 prod_base_year = prod_base_year[prod_base_year.notna()]
             prod_metrics = prod_base_year.map(lambda x: f"{x.u:~P}")
@@ -1505,9 +1502,7 @@ def fill_blank_or_missing_scopes(
             )
             new_ab.insert(0, "scope", scope_ab)
             new_ab.set_index("scope", append=True, inplace=True)
-            df_ab[df_ab.applymap(lambda x: ITR.isna(x))] = new_ab.loc[
-                new_ab.index.intersection(df_ab.index)
-            ]
+            df_ab[df_ab.map(ITR.isna)] = new_ab.loc[new_ab.index.intersection(df_ab.index)]
             # DF_AB has gaps filled, but not whole new rows that did not exist before
             # Drop rows in NEW_AB already covered by DF_AB and consolidate
             new_ab.drop(index=df_ab.index, inplace=True, errors="ignore")