Integrate JRC processing script, removed raw_data

Co-authored with Ivan Ruiz Manuel (irm-codebase)
calliope-project · Apr 12, 2024 · 87dfea5 · 87dfea5
1 parent 3f3048f
commit 87dfea5
Show file tree

Hide file tree

Showing 10 changed files with 43 additions and 18 deletions.
diff --git a/envs/default.yaml b/envs/default.yaml
@@ -12,7 +12,8 @@ dependencies:
     - pycountry=18.12.8
     - jinja2=2.11.3
     - pip=21.0.1
-    - xarray=0.17.0
+    - xarray=2022.9.0
+    - scipy=1.5.3
     - netCDF4=1.5.6
     - pip:
         - -e ./lib

diff --git a/modules/industry/config.yaml b/modules/industry/config.yaml
@@ -3,6 +3,8 @@ industry:
         path-energy-balances: build/data/annual-energy-balances.csv
         path-cat-names: config/energy-balances/energy-balance-category-names.csv
         path-carrier-names: config/energy-balances/energy-balance-carrier-names.csv
+        path-jrc-industry-energy: build/data/jrc-idees/industry/processed-energy.nc
+        path-jrc-industry-production: build/data/jrc-idees/industry/processed-production.nc
     outputs:
         placeholder-out1:
         placeholder-out2:

diff --git a/modules/industry/env_industry.yaml b/modules/industry/env_industry.yaml
@@ -9,6 +9,9 @@ dependencies:
     - pandas=1.2.3
     - pycountry=18.12.8
     - snakemake-minimal=7.26.0
+    - netCDF4=1.5.6
+    - xarray=2022.9.0
+    - scipy=1.5.3
     - pip:
         - styleframe==4.2
         - -e ./lib
diff --git a/modules/industry/industry.smk b/modules/industry/industry.smk
@@ -22,8 +22,8 @@ rule steel_industry:
         path_energy_balances = config["inputs"]["path-energy-balances"],
         path_cat_names = config["inputs"]["path-cat-names"],
         path_carrier_names = config["inputs"]["path-carrier-names"],
-        path_jrc_energy = f"{DATA_PATH}/jrc_idees_processed_energy.csv.gz",
-        path_jrc_production = f"{DATA_PATH}/jrc_idees_processed_production.csv.gz",
+        path_jrc_industry_energy = config["inputs"]["path-jrc-industry-energy"],
+        path_jrc_industry_production = config["inputs"]["path-jrc-industry-production"],
     output:
         path_output = f"{BUILD_PATH}/annual_demand_steel.csv"
     script: f"{SCRIPT_PATH}/steel_industry.py"

diff --git a/modules/industry/raw_data/jrc_idees_processed_energy.csv.gz b/modules/industry/raw_data/jrc_idees_processed_energy.csv.gz
diff --git a/modules/industry/raw_data/jrc_idees_processed_production.csv.gz b/modules/industry/raw_data/jrc_idees_processed_production.csv.gz
diff --git a/modules/industry/schema.yaml b/modules/industry/schema.yaml
@@ -27,6 +27,14 @@ properties:
                         description: |
                             Carrier mapping file.
                             Columns [carrier_code,carrier_name,hh_carrier_name,com_carrier_name,ind_carrier_name,oth_carrier_name].
+                    path-jrc-industry-energy:
+                        type: string
+                        description: |
+                            JRC processed industry energy demand .nc file.
+                    path-jrc-industry-production:
+                        type: string
+                        description: |
+                            JRC processed industrial production .nc file.
             outputs:
                 type: object
                 description: Outputs are paths for the files produced by the module.

diff --git a/modules/industry/scripts/steel_industry.py b/modules/industry/scripts/steel_industry.py
@@ -2,13 +2,14 @@
 
 import eurocalliopelib.utils as ec_utils
 import pandas as pd
+import xarray as xr
 from utils import formatting
 from utils import jrc_idees_parser as jrc
 
 CAT_NAME_STEEL = "Iron and steel"
 
-H2_LHV_KTOE = 2.863  # 0.0333 TWh/kt LHV -> 2.863ktoe/kt
-HDRI_CONSUMPTION = 0.0116  # H-DRI: 135kWh_e/t = 0.0116ktoe/kt
+H2_LHV_KTOE = 0.0333  # 0.0333 TWh/kt LHV
+HDRI_CONSUMPTION = 135e-6  # H-DRI: 135kWh_e/t
 
 
 def _get_h2_to_steel(recycled_steel_share: float) -> float:
@@ -23,8 +24,8 @@ def get_steel_demand_df(
     path_energy_balances: str,
     path_cat_names: str,
     path_carrier_names: str,
-    path_jrc_energy: str,
-    path_jrc_production: str,
+    path_jrc_industry_energy: str,
+    path_jrc_industry_production: str,
     path_output: Optional[str] = None,
 ) -> pd.DataFrame:
     """Execute the data processing pipeline for the "Iron and steel" sub-sector.
@@ -34,8 +35,8 @@ def get_steel_demand_df(
         path_energy_balances (str): country energy balances (usually from eurostat).
         path_cat_names (str): eurostat category mapping file.
         path_carrier_names (str): eurostat carrier name mapping file.
-        path_jrc_energy (str): jrc country-specific industrial energy demand file.
-        path_jrc_production (str): jrc country-specific industrial production file.
+        path_jrc_industry_energy (str): jrc country-specific industrial energy demand file.
+        path_jrc_industry_production (str): jrc country-specific industrial production file.
         path_output (str): location of steel demand output file.
 
     Returns:
@@ -49,8 +50,18 @@ def get_steel_demand_df(
     )
     cat_names_df = pd.read_csv(path_cat_names, header=0, index_col=0)
     carrier_names_df = pd.read_csv(path_carrier_names, header=0, index_col=0)
-    energy_df = pd.read_csv(path_jrc_energy, index_col=[0, 1, 2, 3, 4, 5, 6])
-    prod_df = pd.read_csv(path_jrc_production, index_col=[0, 1, 2, 3])
+
+    energy_df = xr.open_dataset(path_jrc_industry_energy).to_dataframe().unstack("year")
+    energy_df["unit"] = "twh"
+    energy_df = energy_df.set_index("unit", append=True)
+    energy_df.columns = energy_df.columns.droplevel()
+    prod_df = (
+        xr.open_dataset(path_jrc_industry_production).to_dataframe().unstack("year")
+    )
+    prod_df["unit"] = "twh"
+    prod_df = prod_df.set_index("unit", append=True)
+    prod_df.columns = prod_df.columns.droplevel()
+
     # Ensure dataframes only have data specific to this industry
     cat_names_df = cat_names_df[cat_names_df["jrc_idees"] == CAT_NAME_STEEL]
     energy_df = energy_df.xs(CAT_NAME_STEEL, level="cat_name", drop_level=False)
@@ -211,7 +222,7 @@ def process_steel_energy_consumption(
         .assign(carrier="space_heat")
         .set_index("carrier", append=True)
         .sum(level=total_specific_consumption.index.names)
-        .rename(index={"ktoe": "ktoe/kt"})
+        .rename(index={"twh": "twh/kt"})
     )
     total_specific_consumption = total_specific_consumption.append(
         space_heat_specific_demand
@@ -220,7 +231,7 @@ def process_steel_energy_consumption(
     steel_consumption = total_specific_consumption.mul(
         jrc_prod_df.xs("Iron and steel", level="cat_name").sum(level="country_code"),
         level="country_code",
-    ).rename(index={"ktoe/kt": "ktoe"})
+    ).rename(index={"twh/kt": "twh"})
 
     return steel_consumption
 
@@ -232,7 +243,7 @@ def process_steel_energy_consumption(
         path_energy_balances=snakemake.input.path_energy_balances,
         path_cat_names=snakemake.input.path_cat_names,
         path_carrier_names=snakemake.input.path_carrier_names,
-        path_jrc_energy=snakemake.input.path_jrc_energy,
-        path_jrc_production=snakemake.input.path_jrc_production,
+        path_jrc_industry_energy=snakemake.input.path_jrc_industry_energy,
+        path_jrc_industry_production=snakemake.input.path_jrc_industry_production,
         path_output=snakemake.output.path_output,
     )
diff --git a/modules/industry/scripts/utils/formatting.py b/modules/industry/scripts/utils/formatting.py
@@ -49,7 +49,7 @@ def fill_missing_data(
         .sum(level="cat_code", min_count=1)
         .stack("country")
         .rename_axis(index=["cat_name", "country_code"])
-        .apply(ec_utils.tj_to_ktoe)
+        .apply(ec_utils.tj_to_twh)
     )
 
     country_codes = subsector_energy_consumption_df.index.get_level_values(

diff --git a/modules/industry/scripts/utils/jrc_idees_parser.py b/modules/industry/scripts/utils/jrc_idees_parser.py
@@ -15,7 +15,7 @@ def get_auxiliary_electricity_consumption(
     )
     specific_consumption = consumption.div(jrc_prod_df.loc[process].droplevel("unit"))
     specific_consumption.index = specific_consumption.index.set_levels(
-        ["ktoe/kt"], level="unit"
+        ["twh/kt"], level="unit"
     )
     return specific_consumption.fillna(0)
 
@@ -45,7 +45,7 @@ def get_specific_electricity_consumption(
     )
 
     specific_consumption = specific_demand.div(electrical_efficiency).rename(
-        index={"ktoe": "ktoe/kt"}
+        index={"twh": "twh/kt"}
     )
     assert (
         (