Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Address Pandas 2.1.0 deprecations #255

Merged
merged 2 commits into from
Sep 20, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions examples/ITR_UI.py
Original file line number Diff line number Diff line change
Expand Up @@ -1888,7 +1888,7 @@ def update_graph(
changed_id = [p["prop_id"] for p in dash.callback_context.triggered][
0
] # to catch which widgets were pressed
amended_portfolio = pd.read_json(portfolio_json, orient="split")
amended_portfolio = pd.read_json(io.StringIO(initial_value=portfolio_json), orient="split")
# Why does this get lost in translation?
amended_portfolio.index.name = "company_id"
amended_portfolio = amended_portfolio.assign(
Expand Down Expand Up @@ -1960,7 +1960,7 @@ def update_graph(
scope_mask = amended_portfolio.scope == EScope[scope]
filt_df = amended_portfolio[
temp_score_mask & sec_mask & reg_mask & scope_mask
] # filtering
].copy() # filtering; copy makes it safe to modify column contents without getting warngins
if len(filt_df) == 0: # if after filtering the dataframe is empty
# breakpoint()
raise PreventUpdate
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ openpyxl==3.0.10
openscm-units==0.5.2
orca==1.8
osc-ingest-tools>=0.4.3
pandas>=2.0.3
pandas>=2.1.0
Pint>=0.22
Pint-Pandas>=0.5
psutil==5.9.5
Expand Down
31 changes: 9 additions & 22 deletions src/ITR/data/base_providers.py
Original file line number Diff line number Diff line change
Expand Up @@ -409,7 +409,6 @@ def _get_intensity_benchmarks(
sec_reg_scopes = company_sector_region_scope[["sector", "region", "scope"]]
if scope_to_calc is not None:
sec_reg_scopes = sec_reg_scopes[sec_reg_scopes.scope.eq(scope_to_calc)]
sec_reg_scopes[~sec_reg_scopes.index.duplicated()]
sec_reg_scopes_mi = pd.MultiIndex.from_frame(sec_reg_scopes).unique()
bm_proj_t = self._EI_df_t.loc[
range(
Expand All @@ -429,20 +428,12 @@ def _get_intensity_benchmarks(
# This piece of work essentially does a column-based join (to avoid extra transpositions)
result = pd.concat(
[
pd.Series()
if x is None
else bm_proj_t[tuple(x[1])].rename((x[0], x[1][-1]))
for y in sec_reg_scopes.iterrows()
# In the happy path, we can use sector/region/scope index as-is
# In the less-happy path, we have to construct sector/'Global'/scope
# In the least happy path, we have to ditch the row because our benchmark does not cover it
for x in [
y
if tuple(y[1]) in bm_proj_t
else (y[0], (y[1][0], "Global", y[1][2]))
if (y[1][0], "Global", y[1][2]) in bm_proj_t
else None
]
bm_proj_t[tuple(ser)].rename((idx, ser.iloc[2]))
if tuple(ser) in bm_proj_t
else bm_proj_t[ser_global].rename((idx, ser.iloc[2]))
if (ser_global:=(ser.iloc[0], "Global", ser.iloc[2],)) in bm_proj_t
else pd.Series()
for idx, ser in sec_reg_scopes.iterrows()
],
axis=1,
).dropna(axis=1, how="all")
Expand Down Expand Up @@ -1130,9 +1121,7 @@ def project_ei_trajectories(
)
)
else:
backfilled_t = historic_ei_t.apply(
lambda col: col.fillna(method="bfill")
)
backfilled_t = historic_ei_t.bfill(axis=0)
# FIXME: this hack causes backfilling only on dates on or after the first year of the benchmark, which keeps it from disrupting current test cases
# while also working on real-world use cases. But we need to formalize this decision.
backfilled_t = backfilled_t.reset_index()
Expand Down Expand Up @@ -1526,14 +1515,12 @@ def _get_trends(self, intensities_t: pd.DataFrame):
intensities_t = intensities_t.apply(
lambda col: col
if col.dtype == np.float64
# Float64 NA needs to be converted to np.nan before we can apply nominal_values
else ITR.nominal_values(col.fillna(np.nan)).astype(np.float64)
)
# FIXME: Pandas 2.1
# Treat NaN ratios as "unchnaged year on year"
# FIXME Could we ever have UFloat NaNs here? np.nan is valid UFloat.
ratios_t: pd.DataFrame = intensities_t.rolling(
window=2, axis="index", closed="right"
).apply(func=self._year_on_year_ratio, raw=True)
ratios_t: pd.DataFrame = intensities_t.rolling(window=2, closed="right").apply(func=self._year_on_year_ratio, raw=True)
ratios_t = ratios_t.apply(
lambda col: col.fillna(0) if all(col.map(lambda x: ITR.isna(x))) else col
)
Expand Down
15 changes: 5 additions & 10 deletions src/ITR/data/template.py
Original file line number Diff line number Diff line change
Expand Up @@ -569,18 +569,15 @@ def _init_from_template_company_data(self, excel_path: str):
].ffill()

# NA in exposure is how we drop rows we want to ignore
df = df[df.exposure.notna()]

# TODO: Fix market_cap column naming inconsistency
df.rename(
df = df[df.exposure.notna()].rename(
columns={
"revenue": "company_revenue",
"market_cap": "company_market_cap",
"ev": "company_enterprise_value",
"evic": "company_ev_plus_cash",
"assets": "company_total_assets",
},
inplace=True,
# TODO: Fix market_cap column naming inconsistency
}
)
df.loc[df.region.isnull(), "region"] = df.country.map(ITR_country_to_region)

Expand Down Expand Up @@ -1388,7 +1385,7 @@ def _fixup_name(x):
].droplevel("metric")
if prod_base_year.isna().any():
logger.warning(
f"The following companies lack base year production info (will be ignored:\n{prod_base_year[prod_base_year.isna()].index.to_list()}"
f"The following companies lack base year production info (will be ignored):\n{prod_base_year[prod_base_year.isna()].index.to_list()}"
)
prod_base_year = prod_base_year[prod_base_year.notna()]
prod_metrics = prod_base_year.map(lambda x: f"{x.u:~P}")
Expand Down Expand Up @@ -1505,9 +1502,7 @@ def fill_blank_or_missing_scopes(
)
new_ab.insert(0, "scope", scope_ab)
new_ab.set_index("scope", append=True, inplace=True)
df_ab[df_ab.applymap(lambda x: ITR.isna(x))] = new_ab.loc[
new_ab.index.intersection(df_ab.index)
]
df_ab[df_ab.map(ITR.isna)] = new_ab.loc[new_ab.index.intersection(df_ab.index)]
# DF_AB has gaps filled, but not whole new rows that did not exist before
# Drop rows in NEW_AB already covered by DF_AB and consolidate
new_ab.drop(index=df_ab.index, inplace=True, errors="ignore")
Expand Down
Loading