From 8b0ccfaff2c4f93bd383d6b73e5c299deee4c51f Mon Sep 17 00:00:00 2001 From: Electricity Maps Date: Mon, 19 Aug 2024 11:48:06 +0200 Subject: [PATCH 1/2] fix(parsers):India parser updated for new data format --- parsers/IN.py | 40 ++++++++++++++++++++++++++++------------ 1 file changed, 28 insertions(+), 12 deletions(-) diff --git a/parsers/IN.py b/parsers/IN.py index 33db9728a3..7fbf8ca817 100644 --- a/parsers/IN.py +++ b/parsers/IN.py @@ -347,31 +347,46 @@ def fetch_consumption( ).to_list() -def format_ren_production_data(url: str, zone_key: str) -> dict[str, Any]: +def format_ren_production_data(url: str, zone_key: str, target_datetime: datetime) -> dict[str, Any]: """Formats daily renewable production data for each zone""" df_ren = pd.read_excel(url, engine="openpyxl", header=5, skipfooter=2) df_ren = df_ren.dropna(axis=0, how="all") - df_ren = df_ren.rename( - columns={ - df_ren.columns[1]: "region", - df_ren.columns[2]: "wind", - df_ren.columns[3]: "solar", - df_ren.columns[4]: "unknown", - } - ) + + #They changed format of the data from 2024/07/01 + if target_datetime < datetime(2024, 7, 1, 0, 0, tzinfo=IN_TZ): + df_ren = df_ren.rename( + columns={ + df_ren.columns[1]: "region", + df_ren.columns[2]: "wind", + df_ren.columns[3]: "solar", + df_ren.columns[4]: "unknown", + } + ) + else: + df_ren = df_ren.rename( + columns={ + df_ren.columns[0]: "region", + df_ren.columns[1]: "wind", + df_ren.columns[2]: "solar", + df_ren.columns[3]: "unknown", + } + ) + df_ren.loc[:, "zone_key"] = ( df_ren["region"].apply(lambda x: x if "Region" in x else np.nan).backfill() ) + df_ren["zone_key"] = df_ren["zone_key"].str.strip() df_ren["zone_key"] = df_ren["zone_key"].map(CEA_REGION_MAPPING) - + zone_data = df_ren.loc[ (df_ren.zone_key == zone_key) & (~df_ren.region.str.contains("Region")) ][["wind", "solar", "unknown"]].sum() - + renewable_production = { key: round(zone_data.get(key) / CONVERSION_GWH_MW, 3) for key in zone_data.index } + return renewable_production @@ -395,6 +410,7 @@ def fetch_cea_production( for elem in all_data if target_datetime.strftime("%Y-%m-%d") in elem["date"] ] + if len(target_elem) > 0: if target_elem[0]["link"] == "file_not_found": raise ParserException( @@ -406,7 +422,7 @@ def fetch_cea_production( formatted_url = target_url.split("^")[0] r: Response = session.get(formatted_url) renewable_production = format_ren_production_data( - url=r.url, zone_key=zone_key + url=r.url, zone_key=zone_key, target_datetime=target_datetime ) return renewable_production else: From 8eeff54a806580fd3dfd2d341e6a91a05a50cbf0 Mon Sep 17 00:00:00 2001 From: Electricity Maps Date: Mon, 19 Aug 2024 11:57:51 +0200 Subject: [PATCH 2/2] fix(parsers):Reformatted --- parsers/IN.py | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/parsers/IN.py b/parsers/IN.py index 7fbf8ca817..28e2272ab1 100644 --- a/parsers/IN.py +++ b/parsers/IN.py @@ -347,12 +347,14 @@ def fetch_consumption( ).to_list() -def format_ren_production_data(url: str, zone_key: str, target_datetime: datetime) -> dict[str, Any]: +def format_ren_production_data( + url: str, zone_key: str, target_datetime: datetime +) -> dict[str, Any]: """Formats daily renewable production data for each zone""" df_ren = pd.read_excel(url, engine="openpyxl", header=5, skipfooter=2) df_ren = df_ren.dropna(axis=0, how="all") - - #They changed format of the data from 2024/07/01 + + # They changed format of the data from 2024/07/01 if target_datetime < datetime(2024, 7, 1, 0, 0, tzinfo=IN_TZ): df_ren = df_ren.rename( columns={ @@ -371,22 +373,22 @@ def format_ren_production_data(url: str, zone_key: str, target_datetime: datetim df_ren.columns[3]: "unknown", } ) - + df_ren.loc[:, "zone_key"] = ( df_ren["region"].apply(lambda x: x if "Region" in x else np.nan).backfill() ) - + df_ren["zone_key"] = df_ren["zone_key"].str.strip() df_ren["zone_key"] = df_ren["zone_key"].map(CEA_REGION_MAPPING) - + zone_data = df_ren.loc[ (df_ren.zone_key == zone_key) & (~df_ren.region.str.contains("Region")) ][["wind", "solar", "unknown"]].sum() - + renewable_production = { key: round(zone_data.get(key) / CONVERSION_GWH_MW, 3) for key in zone_data.index } - + return renewable_production @@ -410,7 +412,7 @@ def fetch_cea_production( for elem in all_data if target_datetime.strftime("%Y-%m-%d") in elem["date"] ] - + if len(target_elem) > 0: if target_elem[0]["link"] == "file_not_found": raise ParserException(