Skip to content

Commit

Permalink
Integrate JRC processing script, removed raw_data
Browse files Browse the repository at this point in the history
Co-authored with Ivan Ruiz Manuel (irm-codebase)
  • Loading branch information
Meijun Chen committed Apr 12, 2024
1 parent 3f3048f commit 87dfea5
Show file tree
Hide file tree
Showing 10 changed files with 43 additions and 18 deletions.
3 changes: 2 additions & 1 deletion envs/default.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@ dependencies:
- pycountry=18.12.8
- jinja2=2.11.3
- pip=21.0.1
- xarray=0.17.0
- xarray=2022.9.0
- scipy=1.5.3
- netCDF4=1.5.6
- pip:
- -e ./lib
Expand Down
2 changes: 2 additions & 0 deletions modules/industry/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ industry:
path-energy-balances: build/data/annual-energy-balances.csv
path-cat-names: config/energy-balances/energy-balance-category-names.csv
path-carrier-names: config/energy-balances/energy-balance-carrier-names.csv
path-jrc-industry-energy: build/data/jrc-idees/industry/processed-energy.nc
path-jrc-industry-production: build/data/jrc-idees/industry/processed-production.nc
outputs:
placeholder-out1:
placeholder-out2:
Expand Down
3 changes: 3 additions & 0 deletions modules/industry/env_industry.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@ dependencies:
- pandas=1.2.3
- pycountry=18.12.8
- snakemake-minimal=7.26.0
- netCDF4=1.5.6
- xarray=2022.9.0
- scipy=1.5.3
- pip:
- styleframe==4.2
- -e ./lib
4 changes: 2 additions & 2 deletions modules/industry/industry.smk
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ rule steel_industry:
path_energy_balances = config["inputs"]["path-energy-balances"],
path_cat_names = config["inputs"]["path-cat-names"],
path_carrier_names = config["inputs"]["path-carrier-names"],
path_jrc_energy = f"{DATA_PATH}/jrc_idees_processed_energy.csv.gz",
path_jrc_production = f"{DATA_PATH}/jrc_idees_processed_production.csv.gz",
path_jrc_industry_energy = config["inputs"]["path-jrc-industry-energy"],
path_jrc_industry_production = config["inputs"]["path-jrc-industry-production"],
output:
path_output = f"{BUILD_PATH}/annual_demand_steel.csv"
script: f"{SCRIPT_PATH}/steel_industry.py"
Expand Down
Binary file not shown.
Binary file not shown.
8 changes: 8 additions & 0 deletions modules/industry/schema.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,14 @@ properties:
description: |
Carrier mapping file.
Columns [carrier_code,carrier_name,hh_carrier_name,com_carrier_name,ind_carrier_name,oth_carrier_name].
path-jrc-industry-energy:
type: string
description: |
JRC processed industry energy demand .nc file.
path-jrc-industry-production:
type: string
description: |
JRC processed industrial production .nc file.
outputs:
type: object
description: Outputs are paths for the files produced by the module.
Expand Down
35 changes: 23 additions & 12 deletions modules/industry/scripts/steel_industry.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,14 @@

import eurocalliopelib.utils as ec_utils
import pandas as pd
import xarray as xr
from utils import formatting
from utils import jrc_idees_parser as jrc

CAT_NAME_STEEL = "Iron and steel"

H2_LHV_KTOE = 2.863 # 0.0333 TWh/kt LHV -> 2.863ktoe/kt
HDRI_CONSUMPTION = 0.0116 # H-DRI: 135kWh_e/t = 0.0116ktoe/kt
H2_LHV_KTOE = 0.0333 # 0.0333 TWh/kt LHV
HDRI_CONSUMPTION = 135e-6 # H-DRI: 135kWh_e/t


def _get_h2_to_steel(recycled_steel_share: float) -> float:
Expand All @@ -23,8 +24,8 @@ def get_steel_demand_df(
path_energy_balances: str,
path_cat_names: str,
path_carrier_names: str,
path_jrc_energy: str,
path_jrc_production: str,
path_jrc_industry_energy: str,
path_jrc_industry_production: str,
path_output: Optional[str] = None,
) -> pd.DataFrame:
"""Execute the data processing pipeline for the "Iron and steel" sub-sector.
Expand All @@ -34,8 +35,8 @@ def get_steel_demand_df(
path_energy_balances (str): country energy balances (usually from eurostat).
path_cat_names (str): eurostat category mapping file.
path_carrier_names (str): eurostat carrier name mapping file.
path_jrc_energy (str): jrc country-specific industrial energy demand file.
path_jrc_production (str): jrc country-specific industrial production file.
path_jrc_industry_energy (str): jrc country-specific industrial energy demand file.
path_jrc_industry_production (str): jrc country-specific industrial production file.
path_output (str): location of steel demand output file.
Returns:
Expand All @@ -49,8 +50,18 @@ def get_steel_demand_df(
)
cat_names_df = pd.read_csv(path_cat_names, header=0, index_col=0)
carrier_names_df = pd.read_csv(path_carrier_names, header=0, index_col=0)
energy_df = pd.read_csv(path_jrc_energy, index_col=[0, 1, 2, 3, 4, 5, 6])
prod_df = pd.read_csv(path_jrc_production, index_col=[0, 1, 2, 3])

energy_df = xr.open_dataset(path_jrc_industry_energy).to_dataframe().unstack("year")
energy_df["unit"] = "twh"
energy_df = energy_df.set_index("unit", append=True)
energy_df.columns = energy_df.columns.droplevel()
prod_df = (
xr.open_dataset(path_jrc_industry_production).to_dataframe().unstack("year")
)
prod_df["unit"] = "twh"
prod_df = prod_df.set_index("unit", append=True)
prod_df.columns = prod_df.columns.droplevel()

# Ensure dataframes only have data specific to this industry
cat_names_df = cat_names_df[cat_names_df["jrc_idees"] == CAT_NAME_STEEL]
energy_df = energy_df.xs(CAT_NAME_STEEL, level="cat_name", drop_level=False)
Expand Down Expand Up @@ -211,7 +222,7 @@ def process_steel_energy_consumption(
.assign(carrier="space_heat")
.set_index("carrier", append=True)
.sum(level=total_specific_consumption.index.names)
.rename(index={"ktoe": "ktoe/kt"})
.rename(index={"twh": "twh/kt"})
)
total_specific_consumption = total_specific_consumption.append(
space_heat_specific_demand
Expand All @@ -220,7 +231,7 @@ def process_steel_energy_consumption(
steel_consumption = total_specific_consumption.mul(
jrc_prod_df.xs("Iron and steel", level="cat_name").sum(level="country_code"),
level="country_code",
).rename(index={"ktoe/kt": "ktoe"})
).rename(index={"twh/kt": "twh"})

return steel_consumption

Expand All @@ -232,7 +243,7 @@ def process_steel_energy_consumption(
path_energy_balances=snakemake.input.path_energy_balances,
path_cat_names=snakemake.input.path_cat_names,
path_carrier_names=snakemake.input.path_carrier_names,
path_jrc_energy=snakemake.input.path_jrc_energy,
path_jrc_production=snakemake.input.path_jrc_production,
path_jrc_industry_energy=snakemake.input.path_jrc_industry_energy,
path_jrc_industry_production=snakemake.input.path_jrc_industry_production,
path_output=snakemake.output.path_output,
)
2 changes: 1 addition & 1 deletion modules/industry/scripts/utils/formatting.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def fill_missing_data(
.sum(level="cat_code", min_count=1)
.stack("country")
.rename_axis(index=["cat_name", "country_code"])
.apply(ec_utils.tj_to_ktoe)
.apply(ec_utils.tj_to_twh)
)

country_codes = subsector_energy_consumption_df.index.get_level_values(
Expand Down
4 changes: 2 additions & 2 deletions modules/industry/scripts/utils/jrc_idees_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ def get_auxiliary_electricity_consumption(
)
specific_consumption = consumption.div(jrc_prod_df.loc[process].droplevel("unit"))
specific_consumption.index = specific_consumption.index.set_levels(
["ktoe/kt"], level="unit"
["twh/kt"], level="unit"
)
return specific_consumption.fillna(0)

Expand Down Expand Up @@ -45,7 +45,7 @@ def get_specific_electricity_consumption(
)

specific_consumption = specific_demand.div(electrical_efficiency).rename(
index={"ktoe": "ktoe/kt"}
index={"twh": "twh/kt"}
)
assert (
(
Expand Down

0 comments on commit 87dfea5

Please sign in to comment.