diff --git a/parsers/IN.py b/parsers/IN.py index 33db9728a3..28e2272ab1 100644 --- a/parsers/IN.py +++ b/parsers/IN.py @@ -347,21 +347,37 @@ def fetch_consumption( ).to_list() -def format_ren_production_data(url: str, zone_key: str) -> dict[str, Any]: +def format_ren_production_data( + url: str, zone_key: str, target_datetime: datetime +) -> dict[str, Any]: """Formats daily renewable production data for each zone""" df_ren = pd.read_excel(url, engine="openpyxl", header=5, skipfooter=2) df_ren = df_ren.dropna(axis=0, how="all") - df_ren = df_ren.rename( - columns={ - df_ren.columns[1]: "region", - df_ren.columns[2]: "wind", - df_ren.columns[3]: "solar", - df_ren.columns[4]: "unknown", - } - ) + + # They changed format of the data from 2024/07/01 + if target_datetime < datetime(2024, 7, 1, 0, 0, tzinfo=IN_TZ): + df_ren = df_ren.rename( + columns={ + df_ren.columns[1]: "region", + df_ren.columns[2]: "wind", + df_ren.columns[3]: "solar", + df_ren.columns[4]: "unknown", + } + ) + else: + df_ren = df_ren.rename( + columns={ + df_ren.columns[0]: "region", + df_ren.columns[1]: "wind", + df_ren.columns[2]: "solar", + df_ren.columns[3]: "unknown", + } + ) + df_ren.loc[:, "zone_key"] = ( df_ren["region"].apply(lambda x: x if "Region" in x else np.nan).backfill() ) + df_ren["zone_key"] = df_ren["zone_key"].str.strip() df_ren["zone_key"] = df_ren["zone_key"].map(CEA_REGION_MAPPING) @@ -372,6 +388,7 @@ def format_ren_production_data(url: str, zone_key: str) -> dict[str, Any]: renewable_production = { key: round(zone_data.get(key) / CONVERSION_GWH_MW, 3) for key in zone_data.index } + return renewable_production @@ -395,6 +412,7 @@ def fetch_cea_production( for elem in all_data if target_datetime.strftime("%Y-%m-%d") in elem["date"] ] + if len(target_elem) > 0: if target_elem[0]["link"] == "file_not_found": raise ParserException( @@ -406,7 +424,7 @@ def fetch_cea_production( formatted_url = target_url.split("^")[0] r: Response = session.get(formatted_url) renewable_production = format_ren_production_data( - url=r.url, zone_key=zone_key + url=r.url, zone_key=zone_key, target_datetime=target_datetime ) return renewable_production else: