diff --git a/envs/default.yaml b/envs/default.yaml index d74f6f58..98e4ce32 100644 --- a/envs/default.yaml +++ b/envs/default.yaml @@ -12,7 +12,8 @@ dependencies: - pycountry=18.12.8 - jinja2=2.11.3 - pip=21.0.1 - - xarray=0.17.0 + - xarray=2022.9.0 + - scipy=1.5.3 - netCDF4=1.5.6 - pip: - -e ./lib diff --git a/modules/industry/config.yaml b/modules/industry/config.yaml index c5daf5ca..9780709e 100644 --- a/modules/industry/config.yaml +++ b/modules/industry/config.yaml @@ -3,6 +3,8 @@ industry: path-energy-balances: build/data/annual-energy-balances.csv path-cat-names: config/energy-balances/energy-balance-category-names.csv path-carrier-names: config/energy-balances/energy-balance-carrier-names.csv + path-jrc-industry-energy: build/data/jrc-idees/industry/processed-energy.nc + path-jrc-industry-production: build/data/jrc-idees/industry/processed-production.nc outputs: placeholder-out1: placeholder-out2: diff --git a/modules/industry/env_industry.yaml b/modules/industry/env_industry.yaml index 792e9ef9..1b9052e3 100644 --- a/modules/industry/env_industry.yaml +++ b/modules/industry/env_industry.yaml @@ -9,6 +9,9 @@ dependencies: - pandas=1.2.3 - pycountry=18.12.8 - snakemake-minimal=7.26.0 + - netCDF4=1.5.6 + - xarray=2022.9.0 + - scipy=1.5.3 - pip: - styleframe==4.2 - -e ./lib diff --git a/modules/industry/industry.smk b/modules/industry/industry.smk index d61487ad..b4d61632 100644 --- a/modules/industry/industry.smk +++ b/modules/industry/industry.smk @@ -22,8 +22,8 @@ rule steel_industry: path_energy_balances = config["inputs"]["path-energy-balances"], path_cat_names = config["inputs"]["path-cat-names"], path_carrier_names = config["inputs"]["path-carrier-names"], - path_jrc_energy = f"{DATA_PATH}/jrc_idees_processed_energy.csv.gz", - path_jrc_production = f"{DATA_PATH}/jrc_idees_processed_production.csv.gz", + path_jrc_industry_energy = config["inputs"]["path-jrc-industry-energy"], + path_jrc_industry_production = config["inputs"]["path-jrc-industry-production"], output: path_output = f"{BUILD_PATH}/annual_demand_steel.csv" script: f"{SCRIPT_PATH}/steel_industry.py" diff --git a/modules/industry/raw_data/jrc_idees_processed_energy.csv.gz b/modules/industry/raw_data/jrc_idees_processed_energy.csv.gz deleted file mode 100644 index 89efd8b6..00000000 Binary files a/modules/industry/raw_data/jrc_idees_processed_energy.csv.gz and /dev/null differ diff --git a/modules/industry/raw_data/jrc_idees_processed_production.csv.gz b/modules/industry/raw_data/jrc_idees_processed_production.csv.gz deleted file mode 100644 index cbc1c55b..00000000 Binary files a/modules/industry/raw_data/jrc_idees_processed_production.csv.gz and /dev/null differ diff --git a/modules/industry/schema.yaml b/modules/industry/schema.yaml index f047e920..642f35dd 100644 --- a/modules/industry/schema.yaml +++ b/modules/industry/schema.yaml @@ -27,6 +27,14 @@ properties: description: | Carrier mapping file. Columns [carrier_code,carrier_name,hh_carrier_name,com_carrier_name,ind_carrier_name,oth_carrier_name]. + path-jrc-industry-energy: + type: string + description: | + JRC processed industry energy demand .nc file. + path-jrc-industry-production: + type: string + description: | + JRC processed industrial production .nc file. outputs: type: object description: Outputs are paths for the files produced by the module. diff --git a/modules/industry/scripts/steel_industry.py b/modules/industry/scripts/steel_industry.py index c2b352af..e081ea1b 100644 --- a/modules/industry/scripts/steel_industry.py +++ b/modules/industry/scripts/steel_industry.py @@ -2,13 +2,14 @@ import eurocalliopelib.utils as ec_utils import pandas as pd +import xarray as xr from utils import formatting from utils import jrc_idees_parser as jrc CAT_NAME_STEEL = "Iron and steel" -H2_LHV_KTOE = 2.863 # 0.0333 TWh/kt LHV -> 2.863ktoe/kt -HDRI_CONSUMPTION = 0.0116 # H-DRI: 135kWh_e/t = 0.0116ktoe/kt +H2_LHV_KTOE = 0.0333 # 0.0333 TWh/kt LHV +HDRI_CONSUMPTION = 135e-6 # H-DRI: 135kWh_e/t def _get_h2_to_steel(recycled_steel_share: float) -> float: @@ -23,8 +24,8 @@ def get_steel_demand_df( path_energy_balances: str, path_cat_names: str, path_carrier_names: str, - path_jrc_energy: str, - path_jrc_production: str, + path_jrc_industry_energy: str, + path_jrc_industry_production: str, path_output: Optional[str] = None, ) -> pd.DataFrame: """Execute the data processing pipeline for the "Iron and steel" sub-sector. @@ -34,8 +35,8 @@ def get_steel_demand_df( path_energy_balances (str): country energy balances (usually from eurostat). path_cat_names (str): eurostat category mapping file. path_carrier_names (str): eurostat carrier name mapping file. - path_jrc_energy (str): jrc country-specific industrial energy demand file. - path_jrc_production (str): jrc country-specific industrial production file. + path_jrc_industry_energy (str): jrc country-specific industrial energy demand file. + path_jrc_industry_production (str): jrc country-specific industrial production file. path_output (str): location of steel demand output file. Returns: @@ -49,8 +50,18 @@ def get_steel_demand_df( ) cat_names_df = pd.read_csv(path_cat_names, header=0, index_col=0) carrier_names_df = pd.read_csv(path_carrier_names, header=0, index_col=0) - energy_df = pd.read_csv(path_jrc_energy, index_col=[0, 1, 2, 3, 4, 5, 6]) - prod_df = pd.read_csv(path_jrc_production, index_col=[0, 1, 2, 3]) + + energy_df = xr.open_dataset(path_jrc_industry_energy).to_dataframe().unstack("year") + energy_df["unit"] = "twh" + energy_df = energy_df.set_index("unit", append=True) + energy_df.columns = energy_df.columns.droplevel() + prod_df = ( + xr.open_dataset(path_jrc_industry_production).to_dataframe().unstack("year") + ) + prod_df["unit"] = "twh" + prod_df = prod_df.set_index("unit", append=True) + prod_df.columns = prod_df.columns.droplevel() + # Ensure dataframes only have data specific to this industry cat_names_df = cat_names_df[cat_names_df["jrc_idees"] == CAT_NAME_STEEL] energy_df = energy_df.xs(CAT_NAME_STEEL, level="cat_name", drop_level=False) @@ -211,7 +222,7 @@ def process_steel_energy_consumption( .assign(carrier="space_heat") .set_index("carrier", append=True) .sum(level=total_specific_consumption.index.names) - .rename(index={"ktoe": "ktoe/kt"}) + .rename(index={"twh": "twh/kt"}) ) total_specific_consumption = total_specific_consumption.append( space_heat_specific_demand @@ -220,7 +231,7 @@ def process_steel_energy_consumption( steel_consumption = total_specific_consumption.mul( jrc_prod_df.xs("Iron and steel", level="cat_name").sum(level="country_code"), level="country_code", - ).rename(index={"ktoe/kt": "ktoe"}) + ).rename(index={"twh/kt": "twh"}) return steel_consumption @@ -232,7 +243,7 @@ def process_steel_energy_consumption( path_energy_balances=snakemake.input.path_energy_balances, path_cat_names=snakemake.input.path_cat_names, path_carrier_names=snakemake.input.path_carrier_names, - path_jrc_energy=snakemake.input.path_jrc_energy, - path_jrc_production=snakemake.input.path_jrc_production, + path_jrc_industry_energy=snakemake.input.path_jrc_industry_energy, + path_jrc_industry_production=snakemake.input.path_jrc_industry_production, path_output=snakemake.output.path_output, ) diff --git a/modules/industry/scripts/utils/formatting.py b/modules/industry/scripts/utils/formatting.py index 0fcb153d..6cc98384 100644 --- a/modules/industry/scripts/utils/formatting.py +++ b/modules/industry/scripts/utils/formatting.py @@ -49,7 +49,7 @@ def fill_missing_data( .sum(level="cat_code", min_count=1) .stack("country") .rename_axis(index=["cat_name", "country_code"]) - .apply(ec_utils.tj_to_ktoe) + .apply(ec_utils.tj_to_twh) ) country_codes = subsector_energy_consumption_df.index.get_level_values( diff --git a/modules/industry/scripts/utils/jrc_idees_parser.py b/modules/industry/scripts/utils/jrc_idees_parser.py index e30070ca..8c1c6753 100644 --- a/modules/industry/scripts/utils/jrc_idees_parser.py +++ b/modules/industry/scripts/utils/jrc_idees_parser.py @@ -15,7 +15,7 @@ def get_auxiliary_electricity_consumption( ) specific_consumption = consumption.div(jrc_prod_df.loc[process].droplevel("unit")) specific_consumption.index = specific_consumption.index.set_levels( - ["ktoe/kt"], level="unit" + ["twh/kt"], level="unit" ) return specific_consumption.fillna(0) @@ -45,7 +45,7 @@ def get_specific_electricity_consumption( ) specific_consumption = specific_demand.div(electrical_efficiency).rename( - index={"ktoe": "ktoe/kt"} + index={"twh": "twh/kt"} ) assert ( (