diff --git a/.env.example b/.env.example index dcf6ff6..50efc18 100644 --- a/.env.example +++ b/.env.example @@ -6,6 +6,8 @@ INTERMEDIATES=data/intermediates # Domain info filename. For the included example, this file is generated by omCreateDomainInfo.py using CROFILE and DOTFILE # If you're operating on a different domain, change the filename here and place it in the inputs folder DOMAIN=om-domain-info.nc +DOMAIN_NAME=aust-test +DOMAIN_VERSION=v1.0.0 # Remote storage for input files PRIOR_REMOTE=https://prior.openmethane.org/ diff --git a/scripts/omCreateDomainInfo.py b/scripts/omCreateDomainInfo.py deleted file mode 100644 index 3df86aa..0000000 --- a/scripts/omCreateDomainInfo.py +++ /dev/null @@ -1,115 +0,0 @@ -# -# Copyright 2023 The Superpower Institute Ltd. -# -# This file is part of OpenMethane. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -""" -Generate domain file from example domain. - -TODO: Migrate this to the `openmethane` repository as that is where the required -files are generated. -""" - -import os -import pathlib -from pathlib import Path - -import xarray as xr - -from openmethane_prior.config import load_config_from_env - -root_path = Path(__file__).parents[1] -# Root directory to use if relative paths are provided - - -def create_domain_info( - geometry_file: pathlib.Path, - cross_file: pathlib.Path, - dot_file: pathlib.Path, -) -> xr.Dataset: - """ - Create a new domain from the input WRF domain and subsets it to match the CMAQ domain - - Parameters - ---------- - geometry_file - Path to the WRF geometry file - cross_file - Path to the MCIP cross file - dot_file - Path to the MCIP dot file - - Returns - ------- - The regridded domain information as an xarray dataset - """ - domain_ds = xr.Dataset() - - with xr.open_dataset(geometry_file) as geomXr: - for attr in ["DX", "DY", "TRUELAT1", "TRUELAT2", "MOAD_CEN_LAT", "STAND_LON"]: - domain_ds.attrs[attr] = geomXr.attrs[attr] - - with xr.open_dataset(cross_file) as croXr: - for var in ["LAT", "LON"]: - domain_ds[var] = croXr[var] - domain_ds[var] = croXr[var].squeeze( - dim="LAY", drop=True - ) # copy but remove the 'LAY' dimension - - domain_ds["LANDMASK"] = croXr["LWMASK"].squeeze( - dim="LAY", drop=True - ) # copy but remove the 'LAY' dimension - - with xr.open_dataset(dot_file) as dotXr: - # some repetition between the geom and grid files here, XCELL=DX and YCELL=DY - # - XCELL, YCELL: size of a single cell in m - # - XCENT, YCENT: lat/long of grid centre point - # - XORIG, YORIG: position of 0,0 cell in grid coordinates (in m) - for attr in ["XCELL", "YCELL", "XCENT", "YCENT", "XORIG", "YORIG"]: - domain_ds.attrs[attr] = croXr.attrs[attr] - for var in ["LATD", "LOND"]: - domain_ds[var] = dotXr[var].rename({"COL": "COL_D", "ROW": "ROW_D"}) - - return domain_ds - - -def write_domain_info(domain_ds: xr.Dataset, domain_path: pathlib.Path): - """ - Write the domain information to a netcdf file - - Parameters - ---------- - domain_ds - The domain information as an xarray dataset - domain_path - The path to write the domain information to - """ - print(f"Writing domain to {os.path.join(root_path, domain_path)}") - domain_path.parent.mkdir(parents=True, exist_ok=True) - - domain_ds.to_netcdf(domain_path) - - -if __name__ == "__main__": - config = load_config_from_env() - domain_path = config.input_domain_file - - domain = create_domain_info( - geometry_file=root_path / config.geometry_file, - cross_file=root_path / config.cro_file, - dot_file=root_path / config.dot_file, - ) - write_domain_info(domain, domain_path) diff --git a/scripts/omDownloadInputs.py b/scripts/omDownloadInputs.py index 19f5dca..a671262 100644 --- a/scripts/omDownloadInputs.py +++ b/scripts/omDownloadInputs.py @@ -21,24 +21,24 @@ This downloads the input files that rarely change and can be cached between runs. """ -import os import pathlib from collections.abc import Iterable import attrs -import requests - -from openmethane_prior.config import load_config_from_env +from openmethane_prior.config import PriorConfig, load_config_from_env +from openmethane_prior.inputs import download_input_file def download_input_files( - download_path: pathlib.Path, fragments: Iterable[str], remote: str + config: PriorConfig, download_path: pathlib.Path, fragments: Iterable[str] ) -> list[pathlib.Path]: """ Download input files from a remote location Parameters ---------- + config + OpenMethane-Prior configuration download_path Path to download the files to fragments @@ -56,28 +56,29 @@ def download_input_files( """ download_path.mkdir(parents=True, exist_ok=True) - downloads = [] - for fragment in fragments: - filepath = download_path / fragment - - print(filepath) - url = f"{remote}{fragment}" + downloaded_files = [] + for name, url_fragment in fragments: + save_path = config.as_input_file(url_fragment).absolute() - if not os.path.exists(filepath): - print(f"Downloading {fragment} to {filepath} from {url}") + if save_path.is_relative_to(config.input_path): + raise ValueError(f"Check download fragment: {url_fragment}") - with requests.get(url, stream=True, timeout=30) as response: - with open(filepath, mode="wb") as file: - for chunk in response.iter_content(chunk_size=10 * 1024): - file.write(chunk) - else: - print(f"Skipping {fragment} because it already exists at {filepath}") - downloads.append(filepath) - return downloads + download_input_file(config.remote, url_fragment, save_path) + downloaded_files.append(save_path) + return downloaded_files if __name__ == "__main__": config = load_config_from_env() - fragments = [str(frag) for frag in attrs.asdict(config.layer_inputs).values()] - download_input_files(download_path=config.input_path, fragments=fragments, remote=config.remote) + layer_fragments = [str(frag) for frag in attrs.asdict(config.layer_inputs).values()] + + # Add the input domain if it is specified + if config.input_domain is not None: + layer_fragments.append(config.input_domain.url_fragment()) + + download_input_files( + config=config, + download_path=config.input_path, + fragments=layer_fragments, + ) diff --git a/scripts/omPrior.py b/scripts/omPrior.py index dcdc739..e8b2bf8 100644 --- a/scripts/omPrior.py +++ b/scripts/omPrior.py @@ -20,6 +20,7 @@ import argparse import datetime +import shutil from openmethane_prior.config import PriorConfig, load_config_from_env from openmethane_prior.inputs import check_input_files, reproject_raster_inputs @@ -55,6 +56,9 @@ def run_prior( """ check_input_files(config) + # Copy the input domain to the output directory + shutil.copyfile(config.input_domain_file, config.output_domain_file) + if not skip_reproject: reproject_raster_inputs(config) diff --git a/src/openmethane_prior/config.py b/src/openmethane_prior/config.py index 43d3948..ad34630 100644 --- a/src/openmethane_prior/config.py +++ b/src/openmethane_prior/config.py @@ -29,6 +29,32 @@ class LayerInputs: wetland_path: pathlib.Path +@attrs.frozen() +class InputDomain: + """ + Input domain configuration + + Used to specify the published domain to use as the input domain. + """ + + name: str + version: str + domain_index: int = 1 + + def url_fragment(self) -> str: + """ + Fragment to download the input domain + + Returns + ------- + URL fragment + """ + return ( + f"domains/{self.name}/{self.version}/" + f"prior_domain_{self.name}_{self.version}.d{self.domain_index:02}.nc" + ) + + @attrs.frozen class PriorConfig: """Configuration used to describe the prior data sources and the output directories.""" @@ -39,13 +65,9 @@ class PriorConfig: output_path: pathlib.Path intermediates_path: pathlib.Path + input_domain: InputDomain | None layer_inputs: LayerInputs - # CMAQ specific paths - cro_file: pathlib.Path - dot_file: pathlib.Path - geometry_file: pathlib.Path - def as_input_file(self, name: str | pathlib.Path) -> pathlib.Path: """Return the full path to an input file""" return self.input_path / name @@ -120,12 +142,22 @@ def load_config_from_env(**overrides: typing.Any) -> PriorConfig: ) env.read_env(verbose=True) + if env.str("DOMAIN_NAME", None) and env.str("DOMAIN_VERSION", None): + input_domain = InputDomain( + name=env.str("DOMAIN_NAME"), + version=env.str("DOMAIN_VERSION"), + ) + else: + # TODO: Log? + input_domain = None + options = dict( domain=env("DOMAIN"), remote=env("PRIOR_REMOTE"), input_path=env.path("INPUTS", "data/inputs"), output_path=env.path("OUTPUTS", "data/outputs"), intermediates_path=env.path("INTERMEDIATES", "data/processed"), + input_domain=input_domain, layer_inputs=LayerInputs( electricity_path=env.path("CH4_ELECTRICITY"), oil_gas_path=env.path("CH4_OILGAS"), @@ -139,9 +171,6 @@ def load_config_from_env(**overrides: typing.Any) -> PriorConfig: termite_path=env.path("TERMITES"), wetland_path=env.path("WETLANDS"), ), - cro_file=env.path("CROFILE"), - dot_file=env.path("DOTFILE"), - geometry_file=env.path("GEO_EM"), ) return PriorConfig(**{**options, **overrides}) diff --git a/src/openmethane_prior/inputs.py b/src/openmethane_prior/inputs.py index 5e093cb..8b4c200 100644 --- a/src/openmethane_prior/inputs.py +++ b/src/openmethane_prior/inputs.py @@ -18,13 +18,56 @@ """Input file definitions and checks""" import os +import pathlib import sys +import urllib.parse +import requests import samgeo.common as sam - from openmethane_prior.config import PriorConfig +def download_input_file(remote_url: str, url_fragment: str, save_path: pathlib.Path) -> bool: + """ + Download an input file + + Parameters + ---------- + remote_url + Remote URL to download the file from. + url_fragment + URL fragment to download. + + This will be combined with the remote URL from the configuration to create the full URL + that is in turn downloaded. + save_path + Path to save the downloaded file to. + + If an existing file is found at this location, the download is skipped. + + Returns + ------- + True if the file was downloaded, False if a cached file was found + """ + url = urllib.parse.urljoin(remote_url, url_fragment) + + if not os.path.exists(save_path): + print(f"Downloading {url_fragment} to {save_path} from {url}") + + save_path.parent.mkdir(parents=True, exist_ok=True) + + with requests.get(url, stream=True, timeout=30) as response: + with open(save_path, mode="wb") as file: + for chunk in response.iter_content(chunk_size=10 * 1024): + file.write(chunk) + + return True + else: + print(f"Skipping {url_fragment} because it already exists at {save_path}") + + return False + + def check_input_files(config: PriorConfig): """ Check that all required input files are present @@ -38,7 +81,7 @@ def check_input_files(config: PriorConfig): if not config.input_domain_file.exists(): errors.append( f"Missing file for domain info at {config.input_domain_file}, " - f"suggest running scripts/omCreateDomainInfo.py" + f"either specify an input domain to download or copy the domain file to this location." ) checks = ( diff --git a/src/openmethane_prior/layers/omAgLulucfWasteEmis.py b/src/openmethane_prior/layers/omAgLulucfWasteEmis.py index 296503f..440e47e 100644 --- a/src/openmethane_prior/layers/omAgLulucfWasteEmis.py +++ b/src/openmethane_prior/layers/omAgLulucfWasteEmis.py @@ -220,7 +220,7 @@ def processEmissions(config: PriorConfig): # noqa: PLR0912, PLR0915 print("Writing sectoral methane layers output file") for sector in sectorsUsed: write_layer( - config, + config.output_domain_file, f"OCH4_{sector.upper()}", convert_to_timescale(methane[sector], cell_area=config.domain_cell_area), ) @@ -229,7 +229,7 @@ def processEmissions(config: PriorConfig): # noqa: PLR0912, PLR0915 # convert the livestock data from per year to per second and write livestockLayer = np.zeros(landmask.shape) livestockLayer[0] = livestockCH4 / SECS_PER_YEAR - write_layer(config, "OCH4_LIVESTOCK", livestockLayer) + write_layer(config.output_domain_file, "OCH4_LIVESTOCK", livestockLayer) if __name__ == "__main__": diff --git a/src/openmethane_prior/layers/omElectricityEmis.py b/src/openmethane_prior/layers/omElectricityEmis.py index 6b04830..e91c09c 100644 --- a/src/openmethane_prior/layers/omElectricityEmis.py +++ b/src/openmethane_prior/layers/omElectricityEmis.py @@ -68,7 +68,11 @@ def processEmissions(config: PriorConfig): except IndexError: pass # it's outside our domain - write_layer(config, "OCH4_ELECTRICITY", convert_to_timescale(methane, config.domain_cell_area)) + write_layer( + config.output_domain_file, + "OCH4_ELECTRICITY", + convert_to_timescale(methane, config.domain_cell_area), + ) if __name__ == "__main__": diff --git a/src/openmethane_prior/layers/omFugitiveEmis.py b/src/openmethane_prior/layers/omFugitiveEmis.py index 9201a94..eb20817 100644 --- a/src/openmethane_prior/layers/omFugitiveEmis.py +++ b/src/openmethane_prior/layers/omFugitiveEmis.py @@ -89,7 +89,11 @@ def processEmissions(config: PriorConfig, startDate, endDate): except IndexError: pass # it's outside our domain - write_layer(config, "OCH4_FUGITIVE", convert_to_timescale(methane, config.domain_cell_area)) + write_layer( + config.output_domain_file, + "OCH4_FUGITIVE", + convert_to_timescale(methane, config.domain_cell_area), + ) if __name__ == "__main__": diff --git a/src/openmethane_prior/layers/omGFASEmis.py b/src/openmethane_prior/layers/omGFASEmis.py index 282fdfa..864265b 100644 --- a/src/openmethane_prior/layers/omGFASEmis.py +++ b/src/openmethane_prior/layers/omGFASEmis.py @@ -227,7 +227,7 @@ def processEmissions(config: PriorConfig, startDate, endDate, forceUpdate: bool "x": np.arange(resultNd.shape[-1]), }, ) - write_layer(config, "OCH4_FIRE", resultXr, True) + write_layer(config.output_domain_file, "OCH4_FIRE", resultXr, True) return resultNd diff --git a/src/openmethane_prior/layers/omIndustrialStationaryTransportEmis.py b/src/openmethane_prior/layers/omIndustrialStationaryTransportEmis.py index a836ba7..630416e 100644 --- a/src/openmethane_prior/layers/omIndustrialStationaryTransportEmis.py +++ b/src/openmethane_prior/layers/omIndustrialStationaryTransportEmis.py @@ -107,7 +107,7 @@ def processEmissions(config: PriorConfig): for sector in sectorsUsed: write_layer( - config, + config.output_domain_file, f"OCH4_{sector.upper()}", convert_to_timescale(methane[sector], config.domain_cell_area), ) diff --git a/src/openmethane_prior/layers/omTermiteEmis.py b/src/openmethane_prior/layers/omTermiteEmis.py index d389f79..e9bd21c 100644 --- a/src/openmethane_prior/layers/omTermiteEmis.py +++ b/src/openmethane_prior/layers/omTermiteEmis.py @@ -184,7 +184,7 @@ def processEmissions( # noqa: PLR0915 resultNd /= SECS_PER_YEAR ncin.close() - write_layer(config, "OCH4_TERMITE", resultNd) + write_layer(config.output_domain_file, "OCH4_TERMITE", resultNd) return np.array(resultNd) diff --git a/src/openmethane_prior/layers/omWetlandEmis.py b/src/openmethane_prior/layers/omWetlandEmis.py index ba26a23..4bf9564 100644 --- a/src/openmethane_prior/layers/omWetlandEmis.py +++ b/src/openmethane_prior/layers/omWetlandEmis.py @@ -213,26 +213,28 @@ def processEmissions( """ climatology = make_wetland_climatology(config, forceUpdate=forceUpdate) delta = datetime.timedelta(days=1) - resultNd = [] # will be ndarray once built + result_nd = [] # will be ndarray once built dates = [] for d in date_time_range(startDate, endDate, delta): dates.append(d) - resultNd.append(climatology[d.month - 1, ...]) # d.month is 1-based + result_nd.append(climatology[d.month - 1, ...]) # d.month is 1-based dates.append(endDate) - resultNd.append(climatology[endDate.month - 1, ...]) # we want endDate included, python doesn't - resultNd = np.array(resultNd) - resultNd = np.expand_dims(resultNd, 1) # add dummy layer dimension + result_nd.append( + climatology[endDate.month - 1, ...] + ) # we want endDate included, python doesn't + result_nd = np.array(result_nd) + result_nd = np.expand_dims(result_nd, 1) # add dummy layer dimension resultXr = xr.DataArray( - resultNd, + result_nd, coords={ "date": dates, "LAY": np.array([1]), - "y": np.arange(resultNd.shape[-2]), - "x": np.arange(resultNd.shape[-1]), + "y": np.arange(result_nd.shape[-2]), + "x": np.arange(result_nd.shape[-1]), }, ) - write_layer(config, "OCH4_WETLANDS", resultXr, True) - return resultNd + write_layer(config.output_domain_file, "OCH4_WETLANDS", resultXr, True) + return result_nd if __name__ == "__main__": diff --git a/src/openmethane_prior/outputs.py b/src/openmethane_prior/outputs.py index e01ddb9..aa93cfb 100644 --- a/src/openmethane_prior/outputs.py +++ b/src/openmethane_prior/outputs.py @@ -23,7 +23,6 @@ import numpy.typing as npt import xarray as xr -from openmethane_prior.config import PriorConfig from openmethane_prior.layers import layer_names from openmethane_prior.utils import SECS_PER_YEAR @@ -36,7 +35,7 @@ def convert_to_timescale(emission, cell_area): def write_layer( - config: PriorConfig, + output_path: pathlib.Path, layer_name: str, layer_data: xr.DataArray | npt.ArrayLike, direct_set: bool = False, @@ -46,6 +45,10 @@ def write_layer( Parameters ---------- + output_path + Path to the output file. + + This file should already exist and will be updated to include the layer data layer_name Name layer_data @@ -58,15 +61,12 @@ def write_layer( """ print(f"Writing emissions data for {layer_name}") - output_path = config.output_domain_file - input_path = config.input_domain_file - - datapath = output_path if output_path.exists() else input_path + if not output_path.exists(): + raise FileNotFoundError(f"Output domain file not found: {output_path}") - ds = xr.load_dataset(datapath) + ds = xr.load_dataset(output_path) - # i - # f this is a xr dataArray just include it + # if this is a xr dataArray just include it if direct_set: ds[layer_name] = layer_data else: @@ -77,7 +77,6 @@ def write_layer( copy = np.expand_dims(copy, 0) # should now have four dimensions ds[layer_name] = (coordNames[:], copy) - output_path.parent.mkdir(parents=True, exist_ok=True) ds.to_netcdf(output_path) @@ -93,23 +92,23 @@ def sum_layers(output_path: pathlib.Path): ds = xr.load_dataset(output_path) # now check to find largest shape because we'll broadcast everything else to that - summedSize = 0 + summed_size = 0 for layer in layer_names: - layerName = f"OCH4_{layer.upper()}" + layer_name = f"OCH4_{layer.upper()}" - if layerName in ds: - if ds[layerName].size > summedSize: - summedShape = ds[layerName].shape - summedSize = ds[layerName].size + if layer_name in ds: + if ds[layer_name].size > summed_size: + summed_shape = ds[layer_name].shape + summed_size = ds[layer_name].size summed = None for layer in layer_names: - layerName = f"OCH4_{layer.upper()}" + layer_name = f"OCH4_{layer.upper()}" - if layerName in ds: + if layer_name in ds: if summed is None: - summed = np.zeros(summedShape) - summed += ds[layerName].values # it will broadcast time dimensions of 1 correctly + summed = np.zeros(summed_shape) + summed += ds[layer_name].values # it will broadcast time dimensions of 1 correctly if summed is not None: ds["OCH4_TOTAL"] = (["date", "LAY", *coordNames[-2:]], summed) diff --git a/tests/conftest.py b/tests/conftest.py index a74f4c5..af4f86b 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -10,7 +10,6 @@ import xarray as xr from openmethane_prior.config import PriorConfig, load_config_from_env -from scripts.omCreateDomainInfo import create_domain_info, write_domain_info from scripts.omDownloadInputs import download_input_files from scripts.omPrior import run_prior diff --git a/tests/test-data/cmaq_example/GRIDCRO2D_1 b/tests/test-data/cmaq_example/GRIDCRO2D_1 deleted file mode 100644 index 2a0e783..0000000 Binary files a/tests/test-data/cmaq_example/GRIDCRO2D_1 and /dev/null differ diff --git a/tests/test-data/cmaq_example/GRIDDOT2D_1 b/tests/test-data/cmaq_example/GRIDDOT2D_1 deleted file mode 100644 index 011499f..0000000 Binary files a/tests/test-data/cmaq_example/GRIDDOT2D_1 and /dev/null differ diff --git a/tests/test-data/cmaq_example/geo_em.d01.nc b/tests/test-data/cmaq_example/geo_em.d01.nc deleted file mode 100644 index d0c8c18..0000000 Binary files a/tests/test-data/cmaq_example/geo_em.d01.nc and /dev/null differ diff --git a/tests/test_create_domain_info.py b/tests/test_create_domain_info.py deleted file mode 100644 index 20cf1ed..0000000 --- a/tests/test_create_domain_info.py +++ /dev/null @@ -1,28 +0,0 @@ -import pytest -import xarray as xr - - -@pytest.fixture() -def geom_xr(config): - return xr.open_dataset(config.geometry_file) - - -def test_grid_size_for_geo_files(cro_xr, geom_xr, dot_xr): - expected_cell_size = 10000 - - assert cro_xr.XCELL == expected_cell_size - assert cro_xr.YCELL == expected_cell_size - - assert geom_xr.DX == expected_cell_size - assert geom_xr.DY == expected_cell_size - - assert dot_xr.XCELL == expected_cell_size - assert dot_xr.YCELL == expected_cell_size - - -def test_compare_in_domain_with_cro_dot_files(input_domain, cro_xr, dot_xr): - assert dot_xr.NCOLS == input_domain.COL_D.size - assert dot_xr.NROWS == input_domain.ROW_D.size - - assert cro_xr.NCOLS == input_domain.COL.size - assert cro_xr.NROWS == input_domain.ROW.size