From 643de7eadd28b28c24cf5c755656f3d1e8632679 Mon Sep 17 00:00:00 2001 From: Manuel Schlund Date: Fri, 18 Oct 2024 09:14:39 +0200 Subject: [PATCH] Run pre-commit --- esmvalcore/cmor/_fixes/emac/emac.py | 255 ++++---- esmvalcore/cmor/_fixes/icon/_base_fixes.py | 103 ++-- esmvalcore/cmor/_fixes/icon/icon.py | 179 +++--- esmvalcore/cmor/_fixes/icon/icon_seamless.py | 10 +- esmvalcore/cmor/check.py | 397 +++++++----- esmvalcore/local.py | 282 +++++---- tests/unit/cmor/test_cmor_check.py | 617 ++++++++++--------- 7 files changed, 1007 insertions(+), 836 deletions(-) diff --git a/esmvalcore/cmor/_fixes/emac/emac.py b/esmvalcore/cmor/_fixes/emac/emac.py index 908f7ce3c1..8ee386c839 100644 --- a/esmvalcore/cmor/_fixes/emac/emac.py +++ b/esmvalcore/cmor/_fixes/emac/emac.py @@ -34,7 +34,7 @@ class AllVars(EmacFix): # Dictionary to map invalid units in the data to valid entries INVALID_UNITS = { - 'kg/m**2s': 'kg m-2 s-1', + "kg/m**2s": "kg m-2 s-1", } def fix_file(self, filepath, output_dir, add_unique_suffix=False): @@ -49,17 +49,17 @@ def fix_file(self, filepath, output_dir, add_unique_suffix=False): in the class:`iris.cube.CubeList` object returned by :mod:`iris.load`. """ - if 'alevel' not in self.vardef.dimensions: + if "alevel" not in self.vardef.dimensions: return filepath new_path = self.get_fixed_filepath( output_dir, filepath, add_unique_suffix=add_unique_suffix ) copyfile(filepath, new_path) - with Dataset(new_path, mode='a') as dataset: - if 'formula_terms' in dataset.variables['lev'].ncattrs(): - del dataset.variables['lev'].formula_terms - if 'formula_terms' in dataset.variables['ilev'].ncattrs(): - del dataset.variables['ilev'].formula_terms + with Dataset(new_path, mode="a") as dataset: + if "formula_terms" in dataset.variables["lev"].ncattrs(): + del dataset.variables["lev"].formula_terms + if "formula_terms" in dataset.variables["ilev"].ncattrs(): + del dataset.variables["ilev"].formula_terms return new_path def fix_metadata(self, cubes): @@ -72,11 +72,11 @@ def fix_metadata(self, cubes): self.fix_regular_lon(cube) # Fix regular pressure levels (considers plev19, plev39, etc.) - if self.vardef.has_coord_with_standard_name('air_pressure'): + if self.vardef.has_coord_with_standard_name("air_pressure"): self._fix_plev(cube) # Fix hybrid pressure levels - if 'alevel' in self.vardef.dimensions: + if "alevel" in self.vardef.dimensions: cube = self._fix_alevel(cube, cubes) # Fix scalar coordinates @@ -92,9 +92,9 @@ def _fix_plev(self, cube): for coord in cube.coords(): coord_type = iris.util.guess_coord_axis(coord) - if coord_type != 'Z': + if coord_type != "Z": continue - if not coord.units.is_convertible('Pa'): + if not coord.units.is_convertible("Pa"): continue self.fix_plev_metadata(cube, coord) @@ -104,59 +104,60 @@ def _fix_plev(self, cube): raise ValueError( f"Cannot find requested pressure level coordinate for variable " f"'{self.vardef.short_name}', searched for Z-coordinates with " - f"units that are convertible to Pa") + f"units that are convertible to Pa" + ) @staticmethod def _fix_alevel(cube, cubes): """Fix hybrid pressure level coordinate of cube.""" # Add coefficients for hybrid pressure level coordinate coords_to_add = { - 'hyam': 1, - 'hybm': 1, - 'aps_ave': (0, 2, 3), + "hyam": 1, + "hybm": 1, + "aps_ave": (0, 2, 3), } add_aux_coords_from_cubes(cube, cubes, coords_to_add) # Reverse entire cube along Z-axis so that index 0 is surface level # Note: This would automatically be fixed by the CMOR checker, but this # fails to fix the bounds of ap and b - cube = iris.util.reverse(cube, cube.coord(var_name='lev')) + cube = iris.util.reverse(cube, cube.coord(var_name="lev")) # Adapt metadata of coordinates - lev_coord = cube.coord(var_name='lev') - ap_coord = cube.coord(var_name='hyam') - b_coord = cube.coord(var_name='hybm') - ps_coord = cube.coord(var_name='aps_ave') - - lev_coord.var_name = 'lev' - lev_coord.standard_name = 'atmosphere_hybrid_sigma_pressure_coordinate' - lev_coord.long_name = 'hybrid sigma pressure coordinate' - lev_coord.units = '1' - lev_coord.attributes['positive'] = 'down' - - ap_coord.var_name = 'ap' + lev_coord = cube.coord(var_name="lev") + ap_coord = cube.coord(var_name="hyam") + b_coord = cube.coord(var_name="hybm") + ps_coord = cube.coord(var_name="aps_ave") + + lev_coord.var_name = "lev" + lev_coord.standard_name = "atmosphere_hybrid_sigma_pressure_coordinate" + lev_coord.long_name = "hybrid sigma pressure coordinate" + lev_coord.units = "1" + lev_coord.attributes["positive"] = "down" + + ap_coord.var_name = "ap" ap_coord.standard_name = None - ap_coord.long_name = 'vertical coordinate formula term: ap(k)' + ap_coord.long_name = "vertical coordinate formula term: ap(k)" ap_coord.attributes = {} - b_coord.var_name = 'b' + b_coord.var_name = "b" b_coord.standard_name = None - b_coord.long_name = 'vertical coordinate formula term: b(k)' + b_coord.long_name = "vertical coordinate formula term: b(k)" b_coord.attributes = {} - ps_coord.var_name = 'ps' - ps_coord.standard_name = 'surface_air_pressure' - ps_coord.long_name = 'Surface Air Pressure' + ps_coord.var_name = "ps" + ps_coord.standard_name = "surface_air_pressure" + ps_coord.long_name = "Surface Air Pressure" ps_coord.attributes = {} # Add bounds for coefficients # (make sure to reverse cubes beforehand so index 0 is surface level) ap_bnds_cube = iris.util.reverse( - cubes.extract_cube(NameConstraint(var_name='hyai')), + cubes.extract_cube(NameConstraint(var_name="hyai")), 0, ) b_bnds_cube = iris.util.reverse( - cubes.extract_cube(NameConstraint(var_name='hybi')), + cubes.extract_cube(NameConstraint(var_name="hybi")), 0, ) ap_bounds = da.stack( @@ -173,17 +174,21 @@ def _fix_alevel(cube, cubes): # Convert arrays to float64 for coord in (ap_coord, b_coord, ps_coord): coord.points = coord.core_points().astype( - float, casting='same_kind') + float, casting="same_kind" + ) if coord.has_bounds(): coord.bounds = coord.core_bounds().astype( - float, casting='same_kind') + float, casting="same_kind" + ) # Fix values of lev coordinate # Note: lev = a + b with a = ap / p0 (p0 = 100000 Pa) - lev_coord.points = (ap_coord.core_points() / 100000.0 + - b_coord.core_points()) - lev_coord.bounds = (ap_coord.core_bounds() / 100000.0 + - b_coord.core_bounds()) + lev_coord.points = ( + ap_coord.core_points() / 100000.0 + b_coord.core_points() + ) + lev_coord.bounds = ( + ap_coord.core_bounds() / 100000.0 + b_coord.core_bounds() + ) # Add HybridPressureFactory pressure_coord_factory = HybridPressureFactory( @@ -201,12 +206,9 @@ class Clwvi(EmacFix): def fix_metadata(self, cubes): """Fix metadata.""" - cube = ( - self.get_cube(cubes, var_name=['xlvi_cav', 'xlvi_ave', - 'xlvi']) + - self.get_cube(cubes, var_name=['xivi_cav', 'xivi_ave', - 'xivi']) - ) + cube = self.get_cube( + cubes, var_name=["xlvi_cav", "xlvi_ave", "xlvi"] + ) + self.get_cube(cubes, var_name=["xivi_cav", "xivi_ave", "xivi"]) cube.var_name = self.vardef.short_name return CubeList([cube]) @@ -217,23 +219,22 @@ class Prodlnox(EmacFix): def fix_metadata(self, cubes): """Fix metadata.""" noxcg_cube = self.get_cube( - cubes, var_name=['NOxcg_cav', 'NOxcg_ave', 'NOxcg'] + cubes, var_name=["NOxcg_cav", "NOxcg_ave", "NOxcg"] ) noxic_cube = self.get_cube( - cubes, var_name=['NOxic_cav', 'NOxic_ave', 'NOxic'] + cubes, var_name=["NOxic_cav", "NOxic_ave", "NOxic"] ) - dt_cube = self.get_cube(cubes, var_name='dt') + dt_cube = self.get_cube(cubes, var_name="dt") - cube = (( + cube = ( noxcg_cube.collapsed( - ['longitude', 'latitude'], iris.analysis.SUM, weights=None - ) + - noxic_cube.collapsed( - ['longitude', 'latitude'], iris.analysis.SUM, weights=None - )) / - dt_cube - ) - cube.units = 'kg s-1' + ["longitude", "latitude"], iris.analysis.SUM, weights=None + ) + + noxic_cube.collapsed( + ["longitude", "latitude"], iris.analysis.SUM, weights=None + ) + ) / dt_cube + cube.units = "kg s-1" cube.var_name = self.vardef.short_name return CubeList([cube]) @@ -255,7 +256,7 @@ def fix_metadata(self, cubes): """Fix metadata.""" cubes = super().fix_metadata(cubes) cube = self.get_cube(cubes) - z_coord = cube.coord(axis='Z') + z_coord = cube.coord(axis="Z") cube = cube.collapsed(z_coord, iris.analysis.SUM) return CubeList([cube]) @@ -265,12 +266,9 @@ class Pr(EmacFix): def fix_metadata(self, cubes): """Fix metadata.""" - cube = ( - self.get_cube(cubes, var_name=['aprl_cav', 'aprl_ave', - 'aprl']) + - self.get_cube(cubes, var_name=['aprc_cav', 'aprc_ave', - 'aprc']) - ) + cube = self.get_cube( + cubes, var_name=["aprl_cav", "aprl_ave", "aprl"] + ) + self.get_cube(cubes, var_name=["aprc_cav", "aprc_ave", "aprc"]) cube.var_name = self.vardef.short_name return CubeList([cube]) @@ -280,11 +278,10 @@ class Rlds(EmacFix): def fix_metadata(self, cubes): """Fix metadata.""" - cube = ( - self.get_cube(cubes, var_name=['flxtbot_cav', 'flxtbot_ave', - 'flxsbot']) - - self.get_cube(cubes, var_name=['tradsu_cav', 'tradsu_ave', - 'tradsu']) + cube = self.get_cube( + cubes, var_name=["flxtbot_cav", "flxtbot_ave", "flxsbot"] + ) - self.get_cube( + cubes, var_name=["tradsu_cav", "tradsu_ave", "tradsu"] ) cube.var_name = self.vardef.short_name return CubeList([cube]) @@ -304,11 +301,10 @@ class Rsds(EmacFix): def fix_metadata(self, cubes): """Fix metadata.""" - cube = ( - self.get_cube(cubes, var_name=['flxsbot_cav', 'flxsbot_ave', - 'flxsbot']) - - self.get_cube(cubes, var_name=['sradsu_cav', 'sradsu_ave', - 'sradsu']) + cube = self.get_cube( + cubes, var_name=["flxsbot_cav", "flxsbot_ave", "flxsbot"] + ) - self.get_cube( + cubes, var_name=["sradsu_cav", "sradsu_ave", "sradsu"] ) cube.var_name = self.vardef.short_name return CubeList([cube]) @@ -319,11 +315,10 @@ class Rsdt(EmacFix): def fix_metadata(self, cubes): """Fix metadata.""" - cube = ( - self.get_cube(cubes, var_name=['flxstop_cav', 'flxstop_ave', - 'flxstop']) - - self.get_cube(cubes, var_name=['srad0u_cav', 'srad0u_ave', - 'srad0u']) + cube = self.get_cube( + cubes, var_name=["flxstop_cav", "flxstop_ave", "flxstop"] + ) - self.get_cube( + cubes, var_name=["srad0u_cav", "srad0u_ave", "srad0u"] ) cube.var_name = self.vardef.short_name return CubeList([cube]) @@ -343,11 +338,10 @@ class Rtmt(EmacFix): def fix_metadata(self, cubes): """Fix metadata.""" - cube = ( - self.get_cube(cubes, var_name=['flxttop_cav', 'flxttop_ave', - 'flxttop']) + - self.get_cube(cubes, var_name=['flxstop_cav', 'flxstop_ave', - 'flxstop']) + cube = self.get_cube( + cubes, var_name=["flxttop_cav", "flxttop_ave", "flxttop"] + ) + self.get_cube( + cubes, var_name=["flxstop_cav", "flxstop_ave", "flxstop"] ) cube.var_name = self.vardef.short_name return CubeList([cube]) @@ -371,7 +365,7 @@ def fix_metadata(self, cubes): # Note: 1 mm = 100 DU cube = self.get_cube(cubes) cube.data = cube.core_data() / 100.0 - cube.units = 'mm' + cube.units = "mm" return CubeList([cube]) @@ -385,8 +379,8 @@ def fix_metadata(self, cubes): Z = Phi / g0 (g0 is standard acceleration of gravity). """ - g0_value = constants.value('standard acceleration of gravity') - g0_units = constants.unit('standard acceleration of gravity') + g0_value = constants.value("standard acceleration of gravity") + g0_units = constants.unit("standard acceleration of gravity") cube = self.get_cube(cubes) cube.data = cube.core_data() / g0_value @@ -404,14 +398,18 @@ class MP_BC_tot(EmacFix): # noqa: N801 def fix_metadata(self, cubes): """Fix metadata.""" cube = ( - self.get_cube(cubes, var_name=['MP_BC_ki_cav', 'MP_BC_ki_ave', - 'MP_BC_ki']) + - self.get_cube(cubes, var_name=['MP_BC_ks_cav', 'MP_BC_ks_ave', - 'MP_BC_ks']) + - self.get_cube(cubes, var_name=['MP_BC_as_cav', 'MP_BC_as_ave', - 'MP_BC_as']) + - self.get_cube(cubes, var_name=['MP_BC_cs_cav', 'MP_BC_cs_ave', - 'MP_BC_cs']) + self.get_cube( + cubes, var_name=["MP_BC_ki_cav", "MP_BC_ki_ave", "MP_BC_ki"] + ) + + self.get_cube( + cubes, var_name=["MP_BC_ks_cav", "MP_BC_ks_ave", "MP_BC_ks"] + ) + + self.get_cube( + cubes, var_name=["MP_BC_as_cav", "MP_BC_as_ave", "MP_BC_as"] + ) + + self.get_cube( + cubes, var_name=["MP_BC_cs_cav", "MP_BC_cs_ave", "MP_BC_cs"] + ) ) cube.var_name = self.vardef.short_name return CubeList([cube]) @@ -423,14 +421,18 @@ class MP_DU_tot(EmacFix): # noqa: N801 def fix_metadata(self, cubes): """Fix metadata.""" cube = ( - self.get_cube(cubes, var_name=['MP_DU_ai_cav', 'MP_DU_ai_ave', - 'MP_DU_ai']) + - self.get_cube(cubes, var_name=['MP_DU_as_cav', 'MP_DU_as_ave', - 'MP_DU_as']) + - self.get_cube(cubes, var_name=['MP_DU_ci_cav', 'MP_DU_ci_ave', - 'MP_DU_ci']) + - self.get_cube(cubes, var_name=['MP_DU_cs_cav', 'MP_DU_cs_ave', - 'MP_DU_cs']) + self.get_cube( + cubes, var_name=["MP_DU_ai_cav", "MP_DU_ai_ave", "MP_DU_ai"] + ) + + self.get_cube( + cubes, var_name=["MP_DU_as_cav", "MP_DU_as_ave", "MP_DU_as"] + ) + + self.get_cube( + cubes, var_name=["MP_DU_ci_cav", "MP_DU_ci_ave", "MP_DU_ci"] + ) + + self.get_cube( + cubes, var_name=["MP_DU_cs_cav", "MP_DU_cs_ave", "MP_DU_cs"] + ) ) cube.var_name = self.vardef.short_name return CubeList([cube]) @@ -443,17 +445,21 @@ def fix_metadata(self, cubes): """Fix metadata.""" cube = ( self.get_cube( - cubes, var_name=['MP_SO4mm_ns_cav', 'MP_SO4mm_ns_ave', - 'MP_SO4mm_ns']) + - self.get_cube( - cubes, var_name=['MP_SO4mm_ks_cav', 'MP_SO4mm_ks_ave', - 'MP_SO4mm_ks']) + - self.get_cube( - cubes, var_name=['MP_SO4mm_as_cav', 'MP_SO4mm_as_ave', - 'MP_SO4mm_as']) + - self.get_cube( - cubes, var_name=['MP_SO4mm_cs_cav', 'MP_SO4mm_cs_ave', - 'MP_SO4mm_cs']) + cubes, + var_name=["MP_SO4mm_ns_cav", "MP_SO4mm_ns_ave", "MP_SO4mm_ns"], + ) + + self.get_cube( + cubes, + var_name=["MP_SO4mm_ks_cav", "MP_SO4mm_ks_ave", "MP_SO4mm_ks"], + ) + + self.get_cube( + cubes, + var_name=["MP_SO4mm_as_cav", "MP_SO4mm_as_ave", "MP_SO4mm_as"], + ) + + self.get_cube( + cubes, + var_name=["MP_SO4mm_cs_cav", "MP_SO4mm_cs_ave", "MP_SO4mm_cs"], + ) ) cube.var_name = self.vardef.short_name return CubeList([cube]) @@ -465,12 +471,15 @@ class MP_SS_tot(EmacFix): # noqa: N801 def fix_metadata(self, cubes): """Fix metadata.""" cube = ( - self.get_cube(cubes, var_name=['MP_SS_ks_cav', 'MP_SS_ks_ave', - 'MP_SS_ks']) + - self.get_cube(cubes, var_name=['MP_SS_as_cav', 'MP_SS_as_ave', - 'MP_SS_as']) + - self.get_cube(cubes, var_name=['MP_SS_cs_cav', 'MP_SS_cs_ave', - 'MP_SS_cs']) + self.get_cube( + cubes, var_name=["MP_SS_ks_cav", "MP_SS_ks_ave", "MP_SS_ks"] + ) + + self.get_cube( + cubes, var_name=["MP_SS_as_cav", "MP_SS_as_ave", "MP_SS_as"] + ) + + self.get_cube( + cubes, var_name=["MP_SS_cs_cav", "MP_SS_cs_ave", "MP_SS_cs"] + ) ) cube.var_name = self.vardef.short_name return CubeList([cube]) diff --git a/esmvalcore/cmor/_fixes/icon/_base_fixes.py b/esmvalcore/cmor/_fixes/icon/_base_fixes.py index 088ec110ce..8030190e97 100644 --- a/esmvalcore/cmor/_fixes/icon/_base_fixes.py +++ b/esmvalcore/cmor/_fixes/icon/_base_fixes.py @@ -1,4 +1,5 @@ """Fix base classes for ICON on-the-fly CMORizer.""" + from __future__ import annotations import logging @@ -31,10 +32,10 @@ class IconFix(NativeDatasetFix): """ - CACHE_DIR = Path.home() / '.esmvaltool' / 'cache' + CACHE_DIR = Path.home() / ".esmvaltool" / "cache" CACHE_VALIDITY = 7 * 24 * 60 * 60 # [s]; = 1 week TIMEOUT = 5 * 60 # [s]; = 5 min - GRID_FILE_ATTR = 'grid_file_uri' + GRID_FILE_ATTR = "grid_file_uri" def __init__(self, *args, **kwargs): """Initialize ICON fix.""" @@ -69,7 +70,8 @@ def _create_mesh(self, cube: Cube) -> MeshXY: # 'vertex_of_cell'; since UGRID expects a different dimension ordering # we transpose the cube here) vertex_of_cell = horizontal_grid.extract_cube( - NameConstraint(var_name='vertex_of_cell')) + NameConstraint(var_name="vertex_of_cell") + ) vertex_of_cell.transpose() # Extract start index used to name nodes from the the horizontal grid @@ -78,8 +80,8 @@ def _create_mesh(self, cube: Cube) -> MeshXY: # Extract face coordinates from cube (in ICON jargon called 'cell # latitude' and 'cell longitude') - face_lat = cube.coord('latitude') - face_lon = cube.coord('longitude') + face_lat = cube.coord("latitude") + face_lon = cube.coord("longitude") # Extract node coordinates from horizontal grid (node_lat, node_lon) = self._get_node_coords(horizontal_grid) @@ -91,11 +93,11 @@ def _create_mesh(self, cube: Cube) -> MeshXY: # Latitude: there might be slight numerical differences (-> check that # the differences are very small before fixing it) - close_kwargs = {'rtol': 1e-3, 'atol': 1e-5} + close_kwargs = {"rtol": 1e-3, "atol": 1e-5} if not np.allclose( - face_lat.bounds, - node_lat.points[conn_node_inds], - **close_kwargs, # type: ignore + face_lat.bounds, + node_lat.points[conn_node_inds], + **close_kwargs, # type: ignore ): logger.warning( "Latitude bounds of the face coordinate ('clat_vertices' in " @@ -131,15 +133,15 @@ def _create_mesh(self, cube: Cube) -> MeshXY: # Create mesh connectivity = Connectivity( indices=vertex_of_cell.data, - cf_role='face_node_connectivity', + cf_role="face_node_connectivity", start_index=start_index, location_axis=0, ) mesh = MeshXY( topology_dimension=2, - node_coords_and_axes=[(node_lat, 'y'), (node_lon, 'x')], + node_coords_and_axes=[(node_lat, "y"), (node_lon, "x")], connectivities=[connectivity], - face_coords_and_axes=[(face_lat, 'y'), (face_lon, 'x')], + face_coords_and_axes=[(face_lat, "y"), (face_lon, "x")], ) return mesh @@ -150,7 +152,8 @@ def _get_grid_url(self, cube): raise ValueError( f"Cube does not contain the attribute '{self.GRID_FILE_ATTR}' " f"necessary to download the ICON horizontal grid file:\n" - f"{cube}") + f"{cube}" + ) grid_url = cube.attributes[self.GRID_FILE_ATTR] parsed_url = urlparse(grid_url) grid_name = Path(parsed_url.path).name @@ -166,21 +169,22 @@ def _get_node_coords(self, horizontal_grid): """ dual_area_cube = horizontal_grid.extract_cube( - NameConstraint(var_name='dual_area')) - node_lat = dual_area_cube.coord(var_name='vlat') - node_lon = dual_area_cube.coord(var_name='vlon') + NameConstraint(var_name="dual_area") + ) + node_lat = dual_area_cube.coord(var_name="vlat") + node_lon = dual_area_cube.coord(var_name="vlon") # Fix metadata node_lat.bounds = None node_lon.bounds = None - node_lat.var_name = 'nlat' - node_lon.var_name = 'nlon' - node_lat.standard_name = 'latitude' - node_lon.standard_name = 'longitude' - node_lat.long_name = 'node latitude' - node_lon.long_name = 'node longitude' - node_lat.convert_units('degrees_north') - node_lon.convert_units('degrees_east') + node_lat.var_name = "nlat" + node_lon.var_name = "nlon" + node_lat.standard_name = "latitude" + node_lon.standard_name = "longitude" + node_lat.long_name = "node latitude" + node_lon.long_name = "node longitude" + node_lat.convert_units("degrees_north") + node_lon.convert_units("degrees_east") # Convert longitude to [0, 360] self._set_range_in_0_360(node_lon) @@ -190,10 +194,10 @@ def _get_node_coords(self, horizontal_grid): def _get_path_from_facet(self, facet, description=None): """Try to get path from facet.""" if description is None: - description = 'File' + description = "File" path = Path(os.path.expandvars(self.extra_facets[facet])).expanduser() if not path.is_file(): - new_path = self.session['auxiliary_data_dir'] / path + new_path = self.session["auxiliary_data_dir"] / path if not new_path.is_file(): raise FileNotFoundError( f"{description} '{path}' given by facet '{facet}' does " @@ -243,8 +247,8 @@ def add_additional_cubes(self, cubes): """ facets_to_consider = [ - 'zg_file', - 'zghalf_file', + "zg_file", + "zghalf_file", ] for facet in facets_to_consider: if self.extra_facets.get(facet) is None: @@ -259,7 +263,7 @@ def add_additional_cubes(self, cubes): def _get_grid_from_facet(self): """Get horizontal grid from user-defined facet `horizontal_grid`.""" grid_path = self._get_path_from_facet( - 'horizontal_grid', 'Horizontal grid file' + "horizontal_grid", "Horizontal grid file" ) grid_name = grid_path.name @@ -302,7 +306,7 @@ def _get_grid_from_cube_attr(self, cube: Cube) -> Cube: def _get_grid_from_rootpath(self, grid_name: str) -> CubeList | None: """Try to get grid from the ICON rootpath.""" glob_patterns: list[Path] = [] - for data_source in _get_data_sources('ICON'): + for data_source in _get_data_sources("ICON"): glob_patterns.extend( data_source.get_glob_patterns(**self.extra_facets) ) @@ -339,8 +343,10 @@ def _get_downloaded_grid(self, grid_url: str, grid_name: str) -> CubeList: logger.debug("Using cached ICON grid file '%s'", grid_path) valid_cache = True else: - logger.debug("Existing cached ICON grid file '%s' is outdated", - grid_path) + logger.debug( + "Existing cached ICON grid file '%s' is outdated", + grid_path, + ) # File is not present in cache or too old -> download it if not valid_cache: @@ -352,12 +358,12 @@ def _get_downloaded_grid(self, grid_url: str, grid_name: str) -> CubeList: tmp_path, ) with requests.get( - grid_url, - stream=True, - timeout=self.TIMEOUT, + grid_url, + stream=True, + timeout=self.TIMEOUT, ) as response: response.raise_for_status() - with tmp_path.open('wb') as file: + with tmp_path.open("wb") as file: copyfileobj(response.raw, file) shutil.move(tmp_path, grid_path) logger.info( @@ -408,7 +414,7 @@ def get_horizontal_grid(self, cube): file. """ - if self.extra_facets.get('horizontal_grid') is not None: + if self.extra_facets.get("horizontal_grid") is not None: grid = self._get_grid_from_facet() else: grid = self._get_grid_from_cube_attr(cube) @@ -449,9 +455,9 @@ def get_mesh(self, cube): """ # If specified by the user, use `horizontal_grid` facet to determine # grid name; otherwise, use the `grid_file_uri` attribute of the cube - if self.extra_facets.get('horizontal_grid') is not None: + if self.extra_facets.get("horizontal_grid") is not None: grid_path = self._get_path_from_facet( - 'horizontal_grid', 'Horizontal grid file' + "horizontal_grid", "Horizontal grid file" ) grid_name = grid_path.name else: @@ -479,7 +485,8 @@ def _get_start_index(horizontal_grid): """ vertex_index = horizontal_grid.extract_cube( - NameConstraint(var_name='vertex_index')) + NameConstraint(var_name="vertex_index") + ) return np.int32(np.min(vertex_index.data)) @staticmethod @@ -487,24 +494,24 @@ def _load_cubes(path: Path | str) -> CubeList: """Load cubes and ignore certain warnings.""" with warnings.catch_warnings(): warnings.filterwarnings( - 'ignore', + "ignore", message="Ignoring netCDF variable .* invalid units .*", category=UserWarning, - module='iris', + module="iris", ) # iris < 3.8 warnings.filterwarnings( - 'ignore', + "ignore", message="Ignoring invalid units .* on netCDF variable .*", category=UserWarning, - module='iris', + module="iris", ) # iris >= 3.8 warnings.filterwarnings( - 'ignore', + "ignore", message="Failed to create 'height' dimension coordinate: The " - "'height' DimCoord bounds array must be strictly " - "monotonic.", + "'height' DimCoord bounds array must be strictly " + "monotonic.", category=UserWarning, - module='iris', + module="iris", ) cubes = iris.load(path) return cubes diff --git a/esmvalcore/cmor/_fixes/icon/icon.py b/esmvalcore/cmor/_fixes/icon/icon.py index f8a9380e13..5fd7a926a0 100644 --- a/esmvalcore/cmor/_fixes/icon/icon.py +++ b/esmvalcore/cmor/_fixes/icon/icon.py @@ -1,4 +1,5 @@ """CMOR-like reformatting of ICON-A (ECHAM physics).""" + import logging import warnings from datetime import datetime, timedelta @@ -29,38 +30,39 @@ def fix_metadata(self, cubes): cube = self.get_cube(cubes) # Fix time - if self.vardef.has_coord_with_standard_name('time'): + if self.vardef.has_coord_with_standard_name("time"): cube = self._fix_time(cube, cubes) # Fix height (note: cannot use "if 'height' in self.vardef.dimensions" # here since the name of the z-coord varies from variable to variable) - if cube.coords('height'): + if cube.coords("height"): # In case a scalar height is required, remove it here (it is added # at a later stage). The step _fix_height() is designed to fix # non-scalar height coordinates. - if (cube.coord('height').shape[0] == 1 and ( - 'height2m' in self.vardef.dimensions or - 'height10m' in self.vardef.dimensions)): + if cube.coord("height").shape[0] == 1 and ( + "height2m" in self.vardef.dimensions + or "height10m" in self.vardef.dimensions + ): # If height is a dimensional coordinate with length 1, squeeze # the cube. # Note: iris.util.squeeze is not used here since it might # accidentally squeeze other dimensions. - if cube.coords('height', dim_coords=True): + if cube.coords("height", dim_coords=True): slices = [slice(None)] * cube.ndim - slices[cube.coord_dims('height')[0]] = 0 + slices[cube.coord_dims("height")[0]] = 0 cube = cube[tuple(slices)] - cube.remove_coord('height') + cube.remove_coord("height") else: cube = self._fix_height(cube, cubes) # Fix latitude - if self.vardef.has_coord_with_standard_name('latitude'): + if self.vardef.has_coord_with_standard_name("latitude"): lat_idx = self._fix_lat(cube) else: lat_idx = None # Fix longitude - if self.vardef.has_coord_with_standard_name('longitude'): + if self.vardef.has_coord_with_standard_name("longitude"): lon_idx = self._fix_lon(cube) else: lon_idx = None @@ -104,13 +106,14 @@ def _add_coord_from_grid_file(self, cube, coord_name): # The following dict maps from desired coordinate name in output file # (dict keys) to coordinate name in grid file (dict values) coord_names_mapping = { - 'latitude': 'grid_latitude', - 'longitude': 'grid_longitude', + "latitude": "grid_latitude", + "longitude": "grid_longitude", } if coord_name not in coord_names_mapping: raise ValueError( f"coord_name must be one of {list(coord_names_mapping)}, got " - f"'{coord_name}'") + f"'{coord_name}'" + ) coord_name_in_grid = coord_names_mapping[coord_name] # Use 'cell_area' as dummy cube to extract desired coordinates @@ -118,7 +121,8 @@ def _add_coord_from_grid_file(self, cube, coord_name): # supported horizontal_grid = self.get_horizontal_grid(cube) grid_cube = horizontal_grid.extract_cube( - NameConstraint(var_name='cell_area')) + NameConstraint(var_name="cell_area") + ) coord = grid_cube.coord(coord_name_in_grid) # Find index of mesh dimension (= single unnamed dimension) @@ -127,7 +131,8 @@ def _add_coord_from_grid_file(self, cube, coord_name): raise ValueError( f"Cannot determine coordinate dimension for coordinate " f"'{coord_name}', cube does not contain a single unnamed " - f"dimension:\n{cube}") + f"dimension:\n{cube}" + ) coord_dims = () for idx in range(cube.ndim): if not cube.coords(dimensions=idx, dim_coords=True): @@ -144,21 +149,22 @@ def _add_time(self, cube, cubes): """Add time coordinate from other cube in cubes.""" # Try to find time cube from other cubes and it to target cube for other_cube in cubes: - if not other_cube.coords('time'): + if not other_cube.coords("time"): continue - time_coord = other_cube.coord('time') + time_coord = other_cube.coord("time") cube = add_leading_dim_to_cube(cube, time_coord) return cube raise ValueError( f"Cannot add required coordinate 'time' to variable " f"'{self.vardef.short_name}', cube and other cubes in file do not " - f"contain it") + f"contain it" + ) def _get_z_coord(self, cubes, points_name, bounds_name=None): """Get z-coordinate without metadata (reversed).""" points_cube = iris.util.reverse( cubes.extract_cube(NameConstraint(var_name=points_name)), - 'height', + "height", ) points = points_cube.core_data() @@ -166,7 +172,7 @@ def _get_z_coord(self, cubes, points_name, bounds_name=None): if bounds_name is not None: bounds_cube = iris.util.reverse( cubes.extract_cube(NameConstraint(var_name=bounds_name)), - 'height', + "height", ) bounds = bounds_cube.core_data() bounds = da.stack( @@ -186,33 +192,33 @@ def _fix_height(self, cube, cubes): """Fix height coordinate of cube.""" # Reverse entire cube along height axis so that index 0 is surface # level - cube = iris.util.reverse(cube, 'height') + cube = iris.util.reverse(cube, "height") # If possible, extract reversed air_pressure coordinate from list of # cubes and add it to cube # Note: pfull/phalf have dimensions (time, height, spatial_dim) - if cubes.extract(NameConstraint(var_name='pfull')): - if cubes.extract(NameConstraint(var_name='phalf')): - phalf = 'phalf' + if cubes.extract(NameConstraint(var_name="pfull")): + if cubes.extract(NameConstraint(var_name="phalf")): + phalf = "phalf" else: phalf = None - plev_coord = self._get_z_coord(cubes, 'pfull', bounds_name=phalf) + plev_coord = self._get_z_coord(cubes, "pfull", bounds_name=phalf) self.fix_plev_metadata(cube, plev_coord) cube.add_aux_coord(plev_coord, np.arange(cube.ndim)) - elif cubes.extract(NameConstraint(var_name='pres')): - plev_coord = self._get_z_coord(cubes, 'pres') + elif cubes.extract(NameConstraint(var_name="pres")): + plev_coord = self._get_z_coord(cubes, "pres") self.fix_plev_metadata(cube, plev_coord) cube.add_aux_coord(plev_coord, np.arange(cube.ndim)) # If possible, extract reversed altitude coordinate from list of cubes # and add it to cube # Note: zg/zghalf have dimensions (height, spatial_dim) - if cubes.extract(NameConstraint(var_name='zg')): - if cubes.extract(NameConstraint(var_name='zghalf')): - zghalf = 'zghalf' + if cubes.extract(NameConstraint(var_name="zg")): + if cubes.extract(NameConstraint(var_name="zghalf")): + zghalf = "zghalf" else: zghalf = None - alt_coord = self._get_z_coord(cubes, 'zg', bounds_name=zghalf) + alt_coord = self._get_z_coord(cubes, "zg", bounds_name=zghalf) self.fix_alt16_metadata(cube, alt_coord) # Altitude coordinate only spans height and spatial dimensions (no @@ -220,15 +226,15 @@ def _fix_height(self, cube, cubes): cube.add_aux_coord(alt_coord, np.arange(cube.ndim)[-2:]) # Fix metadata - z_coord = cube.coord('height') - if z_coord.units.is_convertible('m'): + z_coord = cube.coord("height") + if z_coord.units.is_convertible("m"): self.fix_height_metadata(cube, z_coord) else: - z_coord.var_name = 'model_level' + z_coord.var_name = "model_level" z_coord.standard_name = None - z_coord.long_name = 'model level number' - z_coord.units = 'no unit' - z_coord.attributes['positive'] = 'up' + z_coord.long_name = "model level number" + z_coord.units = "no unit" + z_coord.attributes["positive"] = "up" z_coord.points = np.arange(len(z_coord.points)) z_coord.bounds = None @@ -236,12 +242,12 @@ def _fix_height(self, cube, cubes): def _fix_lat(self, cube): """Fix latitude coordinate of cube.""" - lat_name = self.extra_facets.get('latitude', 'latitude') + lat_name = self.extra_facets.get("latitude", "latitude") # Add latitude coordinate if not already present if not cube.coords(lat_name): try: - self._add_coord_from_grid_file(cube, 'latitude') + self._add_coord_from_grid_file(cube, "latitude") except Exception as exc: msg = "Failed to add missing latitude coordinate to cube" raise ValueError(msg) from exc @@ -253,12 +259,12 @@ def _fix_lat(self, cube): def _fix_lon(self, cube): """Fix longitude coordinate of cube.""" - lon_name = self.extra_facets.get('longitude', 'longitude') + lon_name = self.extra_facets.get("longitude", "longitude") # Add longitude coordinate if not already present if not cube.coords(lon_name): try: - self._add_coord_from_grid_file(cube, 'longitude') + self._add_coord_from_grid_file(cube, "longitude") except Exception as exc: msg = "Failed to add missing longitude coordinate to cube" raise ValueError(msg) from exc @@ -272,7 +278,7 @@ def _fix_lon(self, cube): def _fix_time(self, cube, cubes): """Fix time coordinate of cube.""" # Add time coordinate if not already present - if not cube.coords('time'): + if not cube.coords("time"): cube = self._add_time(cube, cubes) # Fix metadata @@ -280,14 +286,14 @@ def _fix_time(self, cube, cubes): # If necessary, convert invalid time units of the form "day as # %Y%m%d.%f" to CF format (e.g., "days since 1850-01-01") - if 'invalid_units' in time_coord.attributes: + if "invalid_units" in time_coord.attributes: self._fix_invalid_time_units(time_coord) # ICON usually reports aggregated values at the end of the time period, # e.g., for monthly output, ICON reports the month February as 1 March. # Thus, if not disabled, shift all time points back by 1/2 of the given # time period. - if self.extra_facets.get('shift_time', True): + if self.extra_facets.get("shift_time", True): self._shift_time_coord(cube, time_coord) # If not already present, try to add bounds here. Usually bounds are @@ -300,13 +306,15 @@ def _shift_time_coord(self, cube, time_coord): """Shift time points back by 1/2 of given time period (in-place).""" # Do not modify time coordinate for point measurements for cell_method in cube.cell_methods: - is_point_measurement = ('time' in cell_method.coord_names and - 'point' in cell_method.method) + is_point_measurement = ( + "time" in cell_method.coord_names + and "point" in cell_method.method + ) if is_point_measurement: logger.debug( "ICON data describes point measurements: time coordinate " "will not be shifted back by 1/2 of output interval (%s)", - self.extra_facets['frequency'], + self.extra_facets["frequency"], ) return @@ -314,11 +322,11 @@ def _shift_time_coord(self, cube, time_coord): time_coord.bounds = None # For decadal, yearly and monthly data, round datetimes to closest day - freq = self.extra_facets['frequency'] - if 'dec' in freq or 'yr' in freq or 'mon' in freq: + freq = self.extra_facets["frequency"] + if "dec" in freq or "yr" in freq or "mon" in freq: time_units = time_coord.units time_coord.convert_units( - Unit('days since 1850-01-01', calendar=time_units.calendar) + Unit("days since 1850-01-01", calendar=time_units.calendar) ) try: time_coord.points = np.around(time_coord.points) @@ -347,19 +355,19 @@ def _shift_time_coord(self, cube, time_coord): ([previous_time_point], time_coord.points) ) time_coord.points = ( - np.convolve(extended_time_points, np.ones(2), 'valid') / 2.0 + np.convolve(extended_time_points, np.ones(2), "valid") / 2.0 ) # running mean with window length 2 time_coord.bounds = np.stack( (extended_time_points[:-1], extended_time_points[1:]), axis=-1 ) logger.debug( "Shifted ICON time coordinate back by 1/2 of output interval (%s)", - self.extra_facets['frequency'], + self.extra_facets["frequency"], ) def _get_previous_timestep(self, datetime_point): """Get previous time step.""" - freq = self.extra_facets['frequency'] + freq = self.extra_facets["frequency"] year = datetime_point.year month = datetime_point.month @@ -369,12 +377,12 @@ def _get_previous_timestep(self, datetime_point): f"step for frequency '{freq}'. Use `shift_time=false` in the " f"recipe to disable this feature" ) - if 'fx' in freq or 'subhr' in freq: + if "fx" in freq or "subhr" in freq: raise ValueError(invalid_freq_error_msg) # For decadal, yearly and monthly data, the points needs to be the # first of the month 00:00:00 - if 'dec' in freq or 'yr' in freq or 'mon' in freq: + if "dec" in freq or "yr" in freq or "mon" in freq: if datetime_point != datetime(year, month, 1): raise ValueError( f"Cannot shift time coordinate: expected first of the " @@ -384,26 +392,26 @@ def _get_previous_timestep(self, datetime_point): ) # Decadal data - if 'dec' in freq: + if "dec" in freq: return datetime_point.replace(year=year - 10) # Yearly data - if 'yr' in freq: + if "yr" in freq: return datetime_point.replace(year=year - 1) # Monthly data - if 'mon' in freq: + if "mon" in freq: new_month = (month - 2) % 12 + 1 new_year = year + (month - 2) // 12 return datetime_point.replace(year=new_year, month=new_month) # Daily data - if 'day' in freq: + if "day" in freq: return datetime_point - timedelta(days=1) # Hourly data - if 'hr' in freq: - (n_hours, _, _) = freq.partition('hr') + if "hr" in freq: + (n_hours, _, _) = freq.partition("hr") if not n_hours: n_hours = 1 return datetime_point - timedelta(hours=int(n_hours)) @@ -421,20 +429,22 @@ def _fix_mesh(self, cube, mesh_idx): # Add dimensional coordinate that describes the mesh dimension index_coord = DimCoord( np.arange(cube.shape[mesh_idx[0]]), - var_name='i', - long_name=('first spatial index for variables stored on an ' - 'unstructured grid'), - units='1', + var_name="i", + long_name=( + "first spatial index for variables stored on an " + "unstructured grid" + ), + units="1", ) cube.add_dim_coord(index_coord, mesh_idx) # If desired, get mesh and replace the original latitude and longitude # coordinates with their new mesh versions - if self.extra_facets.get('ugrid', True): + if self.extra_facets.get("ugrid", True): mesh = self.get_mesh(cube) - cube.remove_coord('latitude') - cube.remove_coord('longitude') - for mesh_coord in mesh.to_MeshCoords('face'): + cube.remove_coord("latitude") + cube.remove_coord("longitude") + for mesh_coord in mesh.to_MeshCoords("face"): cube.add_aux_coord(mesh_coord, mesh_idx) @staticmethod @@ -465,15 +475,15 @@ def _fix_invalid_time_units(time_coord): # ICON data usually has no time bounds. To be 100% sure, we remove the # bounds here (they will be added at a later stage). time_coord.bounds = None - time_format = 'day as %Y%m%d.%f' - t_unit = time_coord.attributes.pop('invalid_units') + time_format = "day as %Y%m%d.%f" + t_unit = time_coord.attributes.pop("invalid_units") if t_unit != time_format: raise ValueError( f"Expected time units '{time_format}' in input file, got " f"'{t_unit}'" ) new_t_units = Unit( - 'days since 1850-01-01', calendar='proleptic_gregorian' + "days since 1850-01-01", calendar="proleptic_gregorian" ) # New routine to convert time of daily and hourly data. The string %f @@ -483,14 +493,14 @@ def _fix_invalid_time_units(time_coord): # First, extract date (year, month, day) from string and convert it to # datetime object - year_month_day_str = time_str.str.extract(r'(\d*)\.?\d*', expand=False) - year_month_day = pd.to_datetime(year_month_day_str, format='%Y%m%d') + year_month_day_str = time_str.str.extract(r"(\d*)\.?\d*", expand=False) + year_month_day = pd.to_datetime(year_month_day_str, format="%Y%m%d") # Second, extract day fraction and convert it to timedelta object day_float_str = time_str.str.extract( - r'\d*(\.\d*)', expand=False - ).fillna('0.0') - day_float = pd.to_timedelta(day_float_str.astype(float), unit='D') + r"\d*(\.\d*)", expand=False + ).fillna("0.0") + day_float = pd.to_timedelta(day_float_str.astype(float), unit="D") # Finally, add date and day fraction to get final datetime and convert # it to correct units. Note: we also round to next second, otherwise @@ -499,12 +509,12 @@ def _fix_invalid_time_units(time_coord): # pd.Series object directly is broken # (https://github.com/pandas-dev/pandas/issues/57002). datetimes = year_month_day + day_float - rounded_datetimes = pd.Series(dt.round('s') for dt in datetimes) + rounded_datetimes = pd.Series(dt.round("s") for dt in datetimes) with warnings.catch_warnings(): # We already fixed the deprecated code as recommended in the # warning, but it still shows up -> ignore it warnings.filterwarnings( - 'ignore', + "ignore", message="The behavior of DatetimeProperties.to_pydatetime .*", category=FutureWarning, ) @@ -521,9 +531,8 @@ class Clwvi(IconFix): def fix_metadata(self, cubes): """Fix metadata.""" - cube = ( - self.get_cube(cubes, var_name='cllvi') + - self.get_cube(cubes, var_name='clivi') + cube = self.get_cube(cubes, var_name="cllvi") + self.get_cube( + cubes, var_name="clivi" ) cube.var_name = self.vardef.short_name return CubeList([cube]) @@ -535,9 +544,9 @@ class Rtmt(IconFix): def fix_metadata(self, cubes): """Fix metadata.""" cube = ( - self.get_cube(cubes, var_name='rsdt') - - self.get_cube(cubes, var_name='rsut') - - self.get_cube(cubes, var_name='rlut') + self.get_cube(cubes, var_name="rsdt") + - self.get_cube(cubes, var_name="rsut") + - self.get_cube(cubes, var_name="rlut") ) cube.var_name = self.vardef.short_name return CubeList([cube]) diff --git a/esmvalcore/cmor/_fixes/icon/icon_seamless.py b/esmvalcore/cmor/_fixes/icon/icon_seamless.py index 9d6e9d0b31..a7d22492ef 100644 --- a/esmvalcore/cmor/_fixes/icon/icon_seamless.py +++ b/esmvalcore/cmor/_fixes/icon/icon_seamless.py @@ -1,4 +1,5 @@ """CMOR-like reformatting of ICON-Seamless (NWP physics).""" + import logging from iris.cube import CubeList @@ -40,9 +41,8 @@ class Rtmt(IconFix): def fix_metadata(self, cubes): """Fix metadata.""" - cube = ( - self.get_cube(cubes, var_name='sob_t') + - self.get_cube(cubes, var_name='thb_t') + cube = self.get_cube(cubes, var_name="sob_t") + self.get_cube( + cubes, var_name="thb_t" ) cube.var_name = self.vardef.short_name return CubeList([cube]) @@ -58,8 +58,8 @@ def fix_metadata(self, cubes): Z using Z = Phi / g0 (g0 is standard acceleration of gravity). """ - g0_value = constants.value('standard acceleration of gravity') - g0_units = constants.unit('standard acceleration of gravity') + g0_value = constants.value("standard acceleration of gravity") + g0_units = constants.unit("standard acceleration of gravity") cube = self.get_cube(cubes) cube.data = cube.core_data() / g0_value diff --git a/esmvalcore/cmor/check.py b/esmvalcore/cmor/check.py index df9ad0a027..9cd7cb388e 100644 --- a/esmvalcore/cmor/check.py +++ b/esmvalcore/cmor/check.py @@ -1,4 +1,5 @@ """Module for checking iris cubes against their CMOR definitions.""" + from __future__ import annotations import logging @@ -54,7 +55,7 @@ class CMORCheckError(Exception): """Exception raised when a cube does not pass the CMORCheck.""" -class CMORCheck(): +class CMORCheck: """Class used to check the CMOR-compliance of the data. Parameters @@ -90,20 +91,21 @@ class CMORCheck(): Expected frequency for the data. """ - _attr_msg = '{}: {} should be {}, not {}' - _does_msg = '{}: does not {}' - _is_msg = '{}: is not {}' - _vals_msg = '{}: has values {} {}' - _contain_msg = '{}: does not contain {} {}' - - def __init__(self, - cube, - var_info, - frequency=None, - fail_on_error=False, - check_level=CheckLevels.DEFAULT, - automatic_fixes=False): + _attr_msg = "{}: {} should be {}, not {}" + _does_msg = "{}: does not {}" + _is_msg = "{}: is not {}" + _vals_msg = "{}: has values {} {}" + _contain_msg = "{}: does not contain {} {}" + def __init__( + self, + cube, + var_info, + frequency=None, + fail_on_error=False, + check_level=CheckLevels.DEFAULT, + automatic_fixes=False, + ): self._cube = cube self._failerr = fail_on_error self._check_level = check_level @@ -178,7 +180,7 @@ def check_metadata(self, logger: Optional[logging.Logger] = None) -> Cube: self._check_multiple_coords_same_stdname() self._check_dim_names() self._check_coords() - if self.frequency != 'fx': + if self.frequency != "fx": self._check_time_coord() self._check_rank() @@ -239,37 +241,43 @@ def report_errors(self): If any errors were reported before calling this method. """ if self.has_errors(): - msg = '\n'.join([ - f'There were errors in variable {self._cube.var_name}:', - ' ' + '\n '.join(self._errors), - 'in cube:', - f'{self._cube}', - 'loaded from file ' + - self._cube.attributes.get('source_file', ''), - ]) + msg = "\n".join( + [ + f"There were errors in variable {self._cube.var_name}:", + " " + "\n ".join(self._errors), + "in cube:", + f"{self._cube}", + "loaded from file " + + self._cube.attributes.get("source_file", ""), + ] + ) raise CMORCheckError(msg) def report_warnings(self): """Report detected warnings to the given logger.""" if self.has_warnings(): - msg = '\n'.join([ - f'There were warnings in variable {self._cube.var_name}:', - ' ' + '\n '.join(self._warnings), - 'loaded from file ' + - self._cube.attributes.get('source_file', ''), - ]) + msg = "\n".join( + [ + f"There were warnings in variable {self._cube.var_name}:", + " " + "\n ".join(self._warnings), + "loaded from file " + + self._cube.attributes.get("source_file", ""), + ] + ) self._logger.warning(msg) def report_debug_messages(self): """Report detected debug messages to the given logger.""" if self.has_debug_messages(): - msg = '\n'.join([ - f'There were metadata changes in variable ' - f'{self._cube.var_name}:', - ' ' + '\n '.join(self._debug_messages), - 'loaded from file ' + - self._cube.attributes.get('source_file', ''), - ]) + msg = "\n".join( + [ + f"There were metadata changes in variable " + f"{self._cube.var_name}:", + " " + "\n ".join(self._debug_messages), + "loaded from file " + + self._cube.attributes.get("source_file", ""), + ] + ) self._logger.debug(msg) def _check_fill_value(self): @@ -285,43 +293,61 @@ def _check_var_metadata(self): # Check standard_name if self._cmor_var.standard_name: if self._cube.standard_name != self._cmor_var.standard_name: - self.report_error(self._attr_msg, self._cube.var_name, - 'standard_name', - self._cmor_var.standard_name, - self._cube.standard_name) + self.report_error( + self._attr_msg, + self._cube.var_name, + "standard_name", + self._cmor_var.standard_name, + self._cube.standard_name, + ) # Check long_name if self._cmor_var.long_name: if self._cube.long_name != self._cmor_var.long_name: - self.report_error(self._attr_msg, self._cube.var_name, - 'long_name', self._cmor_var.long_name, - self._cube.long_name) + self.report_error( + self._attr_msg, + self._cube.var_name, + "long_name", + self._cmor_var.long_name, + self._cube.long_name, + ) # Check units if self._cmor_var.units: units = self._get_effective_units() if self._cube.units != units: - self.report_error(self._attr_msg, self._cube.var_name, - 'units', self._cmor_var.units, - self._cube.units) + self.report_error( + self._attr_msg, + self._cube.var_name, + "units", + self._cmor_var.units, + self._cube.units, + ) # Check other variable attributes that match entries in cube.attributes - attrs = ('positive', ) + attrs = ("positive",) for attr in attrs: attr_value = getattr(self._cmor_var, attr) if attr_value: if attr not in self._cube.attributes: - self.report_warning('{}: attribute {} not present', - self._cube.var_name, attr) + self.report_warning( + "{}: attribute {} not present", + self._cube.var_name, + attr, + ) elif self._cube.attributes[attr] != attr_value: - self.report_error(self._attr_msg, self._cube.var_name, - attr, attr_value, - self._cube.attributes[attr]) + self.report_error( + self._attr_msg, + self._cube.var_name, + attr, + attr_value, + self._cube.attributes[attr], + ) def _get_effective_units(self): """Get effective units.""" # TODO: remove entire function in v2.12 - if self._cmor_var.units.lower() == 'psu': - units = '1.0' + if self._cmor_var.units.lower() == "psu": + units = "1.0" else: units = self._cmor_var.units return units @@ -344,8 +370,9 @@ def _check_rank(self): # Check number of dimension coords matches rank if self._cube.ndim != rank: - self.report_error(self._does_msg, self._cube.var_name, - 'match coordinate rank') + self.report_error( + self._does_msg, self._cube.var_name, "match coordinate rank" + ) def _check_multiple_coords_same_stdname(self): standard_names = set() @@ -353,67 +380,76 @@ def _check_multiple_coords_same_stdname(self): if coord.standard_name: if coord.standard_name in standard_names: coords = [ - c.var_name for c in self._cube.coords( - standard_name=coord.standard_name) + c.var_name + for c in self._cube.coords( + standard_name=coord.standard_name + ) ] self.report_error( - 'There are multiple coordinates with ' - f'standard_name "{coord.standard_name}": {coords}') + "There are multiple coordinates with " + f'standard_name "{coord.standard_name}": {coords}' + ) else: standard_names.add(coord.standard_name) def _check_dim_names(self): """Check dimension names.""" cmor_var_coordinates = self._cmor_var.coordinates.copy() - link = 'https://github.com/ESMValGroup/ESMValCore/discussions/1587' - for (key, coordinate) in cmor_var_coordinates.items(): + link = "https://github.com/ESMValGroup/ESMValCore/discussions/1587" + for key, coordinate in cmor_var_coordinates.items(): if coordinate.generic_level: self._check_generic_level_dim_names(key, coordinate) else: try: cube_coord = self._cube.coord(var_name=coordinate.out_name) - if (cube_coord.standard_name is None - and coordinate.standard_name == ''): + if ( + cube_coord.standard_name is None + and coordinate.standard_name == "" + ): pass elif cube_coord.standard_name != coordinate.standard_name: self.report_critical( self._attr_msg, coordinate.out_name, - 'standard_name', + "standard_name", coordinate.standard_name, cube_coord.standard_name, ) except iris.exceptions.CoordinateNotFoundError: try: coord = self._cube.coord(coordinate.standard_name) - if coord.standard_name in ['region', 'area_type']: + if coord.standard_name in ["region", "area_type"]: self.report_debug_message( - 'Coordinate {0} has var name {1} ' - 'instead of {2}. ' + "Coordinate {0} has var name {1} " + "instead of {2}. " "But that's considered OK and ignored. " - 'See also {3}', + "See also {3}", coordinate.name, coord.var_name, coordinate.out_name, - link + link, ) else: self.report_error( - 'Coordinate {0} has var name {1} ' - 'instead of {2}', + "Coordinate {0} has var name {1} " + "instead of {2}", coordinate.name, coord.var_name, coordinate.out_name, ) except iris.exceptions.CoordinateNotFoundError: - if coordinate.standard_name in ['time', 'latitude', - 'longitude'] or \ - coordinate.requested: - self.report_critical(self._does_msg, - coordinate.name, 'exist') + if ( + coordinate.standard_name + in ["time", "latitude", "longitude"] + or coordinate.requested + ): + self.report_critical( + self._does_msg, coordinate.name, "exist" + ) else: - self.report_error(self._does_msg, coordinate.name, - 'exist') + self.report_error( + self._does_msg, coordinate.name, "exist" + ) def _check_generic_level_dim_names(self, key, coordinate): """Check name of generic level coordinate.""" @@ -424,19 +460,23 @@ def _check_generic_level_dim_names(self, key, coordinate): if standard_name: if not out_name: self.report_error( - f'Generic level coordinate {key} has wrong var_name.') + f"Generic level coordinate {key} has wrong var_name." + ) level = _get_new_generic_level_coord( self._cmor_var, coordinate, key, name ) self._cmor_var.coordinates[key] = level - self.report_debug_message(f'Generic level coordinate {key} ' - 'will be checked against ' - f'{name} coordinate information') + self.report_debug_message( + f"Generic level coordinate {key} " + "will be checked against " + f"{name} coordinate information" + ) else: if out_name: self.report_critical( - f'Generic level coordinate {key} with out_name ' - f'{out_name} has wrong standard_name or is not set.') + f"Generic level coordinate {key} with out_name " + f"{out_name} has wrong standard_name or is not set." + ) else: self._check_alternative_dim_names(key) @@ -473,14 +513,15 @@ def _check_alternative_dim_names(self, key): values might be disabled. """ try: - (alternative_coord, - cube_coord) = _get_alternative_generic_lev_coord( - self._cube, key, self._cmor_var.table_type + (alternative_coord, cube_coord) = ( + _get_alternative_generic_lev_coord( + self._cube, key, self._cmor_var.table_type + ) ) # No valid alternative coordinate found -> critical error except ValueError: - self.report_critical(self._does_msg, key, 'exist') + self.report_critical(self._does_msg, key, "exist") return # Wrong standard_name -> error @@ -498,7 +539,8 @@ def _check_alternative_dim_names(self, key): f"Found alternative coordinate '{alternative_coord.out_name}' " f"for generic level coordinate '{key}'. Subsequent warnings about " f"levels that are not contained in '{alternative_coord.out_name}' " - f"can be safely ignored.") + f"can be safely ignored." + ) self._check_coord(alternative_coord, cube_coord, cube_coord.var_name) def _check_coords(self): @@ -523,13 +565,13 @@ def _check_coords(self): def _check_coord_ranges(self, coords: list[tuple[CoordinateInfo, Coord]]): """Check coordinate value are inside valid ranges.""" - Limit = namedtuple('Limit', ['name', 'type', 'limit', 'value']) + Limit = namedtuple("Limit", ["name", "type", "limit", "value"]) limits = [] for coord_info, coord in coords: points = coord.core_points() - for limit_type in 'min', 'max': - valid = getattr(coord_info, f'valid_{limit_type}') + for limit_type in "min", "max": + valid = getattr(coord_info, f"valid_{limit_type}") if valid != "": limit = Limit( name=coord_info.out_name, @@ -541,12 +583,14 @@ def _check_coord_ranges(self, coords: list[tuple[CoordinateInfo, Coord]]): limits = dask.compute(*limits) for limit in limits: - if limit.type == 'min' and limit.value < limit.limit: - self.report_critical(self._vals_msg, limit.name, - '< valid_min =', limit.limit) - if limit.type == 'max' and limit.value > limit.limit: - self.report_critical(self._vals_msg, limit.name, - '> valid_max =', limit.limit) + if limit.type == "min" and limit.value < limit.limit: + self.report_critical( + self._vals_msg, limit.name, "< valid_min =", limit.limit + ) + if limit.type == "max" and limit.value > limit.limit: + self.report_critical( + self._vals_msg, limit.name, "> valid_max =", limit.limit + ) def _check_coords_data(self): """Check coordinate data.""" @@ -569,68 +613,78 @@ def _check_coords_data(self): ) self._check_coord_monotonicity_and_direction( - coordinate, coord, var_name) + coordinate, coord, var_name + ) def _check_coord(self, cmor, coord, var_name): """Check single coordinate.""" - if coord.var_name == 'time': + if coord.var_name == "time": return if cmor.units: if str(coord.units) != cmor.units: - self.report_critical(self._attr_msg, var_name, 'units', - cmor.units, coord.units) + self.report_critical( + self._attr_msg, var_name, "units", cmor.units, coord.units + ) self._check_coord_points(cmor, coord, var_name) def _check_coord_bounds(self, cmor, coord, var_name): - if cmor.must_have_bounds == 'yes' and not coord.has_bounds(): + if cmor.must_have_bounds == "yes" and not coord.has_bounds(): self.report_warning( - 'Coordinate {0} from var {1} does not have bounds', - coord.var_name, var_name) + "Coordinate {0} from var {1} does not have bounds", + coord.var_name, + var_name, + ) def _check_time_bounds(self, time): - times = {'time', 'time1', 'time2', 'time3'} + times = {"time", "time1", "time2", "time3"} key = times.intersection(self._cmor_var.coordinates) cmor = self._cmor_var.coordinates[" ".join(key)] - if cmor.must_have_bounds == 'yes' and not time.has_bounds(): + if cmor.must_have_bounds == "yes" and not time.has_bounds(): self.report_warning( - 'Coordinate {0} from var {1} does not have bounds', - time.var_name, self._cmor_var.short_name) + "Coordinate {0} from var {1} does not have bounds", + time.var_name, + self._cmor_var.short_name, + ) def _check_coord_monotonicity_and_direction(self, cmor, coord, var_name): """Check monotonicity and direction of coordinate.""" if coord.ndim > 1: return - if coord.dtype.kind == 'U': + if coord.dtype.kind == "U": return - if (self._unstructured_grid and - coord.standard_name in ['latitude', 'longitude']): + if self._unstructured_grid and coord.standard_name in [ + "latitude", + "longitude", + ]: self.report_debug_message( - f'Coordinate {coord.standard_name} appears to belong to ' - 'an unstructured grid. Skipping monotonicity and ' - 'direction tests.') + f"Coordinate {coord.standard_name} appears to belong to " + "an unstructured grid. Skipping monotonicity and " + "direction tests." + ) return if not coord.is_monotonic(): - self.report_critical(self._is_msg, var_name, 'monotonic') + self.report_critical(self._is_msg, var_name, "monotonic") if len(coord.core_points()) == 1: return if cmor.stored_direction: - if cmor.stored_direction == 'increasing': + if cmor.stored_direction == "increasing": if coord.core_points()[0] > coord.core_points()[1]: - self.report_critical(self._is_msg, var_name, 'increasing') - elif cmor.stored_direction == 'decreasing': + self.report_critical(self._is_msg, var_name, "increasing") + elif cmor.stored_direction == "decreasing": if coord.core_points()[0] < coord.core_points()[1]: - self.report_critical(self._is_msg, var_name, 'decreasing') + self.report_critical(self._is_msg, var_name, "decreasing") def _check_coord_points(self, coord_info, coord, var_name): """Check coordinate points: values, bounds and monotonicity.""" self._check_requested_values(coord, coord_info, var_name) self._check_coord_bounds(coord_info, coord, var_name) - self._check_coord_monotonicity_and_direction(coord_info, coord, - var_name) + self._check_coord_monotonicity_and_direction( + coord_info, coord, var_name + ) def _check_requested_values(self, coord, coord_info, var_name): """Check requested values.""" @@ -638,7 +692,10 @@ def _check_requested_values(self, coord, coord_info, var_name): if coord.core_points().ndim != 1: self.report_debug_message( "Cannot check requested values of {}D coordinate {} since " - "it is not 1D", coord.core_points().ndim, var_name) + "it is not 1D", + coord.core_points().ndim, + var_name, + ) return try: cmor_points = np.array(coord_info.requested, dtype=float) @@ -646,52 +703,60 @@ def _check_requested_values(self, coord, coord_info, var_name): cmor_points = coord_info.requested for point in cmor_points: if point not in coord.core_points(): - self.report_warning(self._contain_msg, var_name, - str(point), str(coord.units)) + self.report_warning( + self._contain_msg, + var_name, + str(point), + str(coord.units), + ) def _check_time_coord(self): """Check time coordinate.""" try: - coord = self._cube.coord('time', dim_coords=True) + coord = self._cube.coord("time", dim_coords=True) except iris.exceptions.CoordinateNotFoundError: try: - coord = self._cube.coord('time') + coord = self._cube.coord("time") except iris.exceptions.CoordinateNotFoundError: return var_name = coord.var_name if not coord.is_monotonic(): - self.report_error('Time coordinate for var {} is not monotonic', - var_name) + self.report_error( + "Time coordinate for var {} is not monotonic", var_name + ) if not coord.units.is_time_reference(): - self.report_critical(self._does_msg, var_name, - 'have time reference units') + self.report_critical( + self._does_msg, var_name, "have time reference units" + ) else: simplified_cal = _get_simplified_calendar(coord.units.calendar) attrs = self._cube.attributes - parent_time = 'parent_time_units' + parent_time = "parent_time_units" if parent_time in attrs: - if attrs[parent_time] in 'no parent': + if attrs[parent_time] in "no parent": pass else: try: cf_units.Unit(attrs[parent_time], simplified_cal) except ValueError: - self.report_warning('Attribute parent_time_units has ' - 'a wrong format and cannot be ' - 'read by cf_units. A fix needs to ' - 'be added to convert properly ' - 'attributes branch_time_in_parent ' - 'and branch_time_in_child.') + self.report_warning( + "Attribute parent_time_units has " + "a wrong format and cannot be " + "read by cf_units. A fix needs to " + "be added to convert properly " + "attributes branch_time_in_parent " + "and branch_time_in_child." + ) # Check frequency tol = 0.001 - intervals = {'dec': (3600, 3660), 'day': (1, 1)} + intervals = {"dec": (3600, 3660), "day": (1, 1)} freq = self.frequency - if freq.lower().endswith('pt'): + if freq.lower().endswith("pt"): freq = freq[:-2] - if freq in ['mon', 'mo']: + if freq in ["mon", "mo"]: dates = coord.units.num2date(coord.points) for i in range(len(coord.points) - 1): first = dates[i] @@ -701,44 +766,45 @@ def _check_time_coord(self): if second_month == 13: second_month = 1 second_year += 1 - if second_month != second.month or \ - second_year != second.year: - msg = '{}: Frequency {} does not match input data' + if second_month != second.month or second_year != second.year: + msg = "{}: Frequency {} does not match input data" self.report_error(msg, var_name, freq) break - elif freq == 'yr': + elif freq == "yr": dates = coord.units.num2date(coord.points) for i in range(len(coord.points) - 1): first = dates[i] second = dates[i + 1] second_month = first.month + 1 if first.year + 1 != second.year: - msg = '{}: Frequency {} does not match input data' + msg = "{}: Frequency {} does not match input data" self.report_error(msg, var_name, freq) break else: if freq in intervals: interval = intervals[freq] target_interval = (interval[0] - tol, interval[1] + tol) - elif freq.endswith('hr'): - if freq == 'hr': - freq = '1hr' + elif freq.endswith("hr"): + if freq == "hr": + freq = "1hr" frequency = freq[:-2] - if frequency == 'sub': + if frequency == "sub": frequency = 1.0 / 24 target_interval = (-tol, frequency + tol) else: frequency = float(frequency) / 24 target_interval = (frequency - tol, frequency + tol) else: - msg = '{}: Frequency {} not supported by checker' + msg = "{}: Frequency {} not supported by checker" self.report_error(msg, var_name, freq) return for i in range(len(coord.points) - 1): interval = coord.points[i + 1] - coord.points[i] - if (interval < target_interval[0] - or interval > target_interval[1]): - msg = '{}: Frequency {} does not match input data' + if ( + interval < target_interval[0] + or interval > target_interval[1] + ): + msg = "{}: Frequency {} does not match input data" self.report_error(msg, var_name, freq) break @@ -805,8 +871,9 @@ def report(self, level, message, *args): self._warnings.append(msg) else: if self._failerr: - raise CMORCheckError(msg + - '\n in cube:\n{}'.format(self._cube)) + raise CMORCheckError( + msg + "\n in cube:\n{}".format(self._cube) + ) self._errors.append(msg) def report_critical(self, message, *args): @@ -874,12 +941,14 @@ def _get_cmor_checker( var_info = get_var_info(project, mip, short_name) def _checker(cube: Cube) -> CMORCheck: - return CMORCheck(cube, - var_info, - frequency=frequency, - fail_on_error=fail_on_error, - check_level=check_level, - automatic_fixes=automatic_fixes) + return CMORCheck( + cube, + var_info, + frequency=frequency, + fail_on_error=fail_on_error, + check_level=check_level, + automatic_fixes=automatic_fixes, + ) return _checker diff --git a/esmvalcore/local.py b/esmvalcore/local.py index 39e25338ce..de4c2b1b04 100644 --- a/esmvalcore/local.py +++ b/esmvalcore/local.py @@ -1,4 +1,5 @@ """Find files on the local filesystem.""" + from __future__ import annotations import itertools @@ -37,13 +38,14 @@ def _get_from_pattern(pattern, date_range_pattern, stem, group): if not daterange: # Retry with extended context for CMIP3 context = r"(?:^|[-_.]|$)" - date_range_pattern_with_context = (context + date_range_pattern + - context) + date_range_pattern_with_context = ( + context + date_range_pattern + context + ) daterange = re.search(date_range_pattern_with_context, stem) if daterange: start_point = daterange.group(group) - end_group = '_'.join([group, 'end']) + end_group = "_".join([group, "end"]) end_point = daterange.group(end_group) else: # Check for single dates in the filename @@ -53,8 +55,8 @@ def _get_from_pattern(pattern, date_range_pattern, stem, group): start_point = end_point = dates[0][0] elif len(dates) > 1: # Check for dates at start or (exclusive or) end of filename - start = re.search(r'^' + pattern, stem) - end = re.search(pattern + r'$', stem) + start = re.search(r"^" + pattern, stem) + end = re.search(pattern + r"$", stem) if start and not end: start_point = end_point = start.group(group) elif end: @@ -64,7 +66,8 @@ def _get_from_pattern(pattern, date_range_pattern, stem, group): def _get_start_end_date( - file: str | Path | LocalFile | ESGFFile) -> tuple[str, str]: + file: str | Path | LocalFile | ESGFFile, +) -> tuple[str, str]: """Get the start and end dates as a string from a file name. Examples of allowed dates: 1980, 198001, 1980-01, 19801231, 1980-12-31, @@ -93,7 +96,7 @@ def _get_start_end_date( ValueError Start or end date cannot be determined. """ - if hasattr(file, 'name'): # Path, LocalFile, ESGFFile + if hasattr(file, "name"): # Path, LocalFile, ESGFFile stem = Path(file.name).stem else: # str stem = Path(file).stem @@ -101,59 +104,71 @@ def _get_start_end_date( start_date = end_date = None # Build regex - time_pattern = (r"(?P[0-2][0-9]" - r"(?P[0-5][0-9]" - r"(?P[0-5][0-9])?)?Z?)") - date_pattern = (r"(?P[0-9]{4})" - r"(?P-?[01][0-9]" - r"(?P-?[0-3][0-9]" - rf"(T?{time_pattern})?)?)?") - datetime_pattern = (rf"(?P{date_pattern})") + time_pattern = ( + r"(?P[0-2][0-9]" + r"(?P[0-5][0-9]" + r"(?P[0-5][0-9])?)?Z?)" + ) + date_pattern = ( + r"(?P[0-9]{4})" + r"(?P-?[01][0-9]" + r"(?P-?[0-3][0-9]" + rf"(T?{time_pattern})?)?)?" + ) + datetime_pattern = rf"(?P{date_pattern})" end_datetime_pattern = datetime_pattern.replace(">", "_end>") # Dates can either be delimited by '-', '_', or '_cat_' (the latter for # CMIP3) - date_range_pattern = (datetime_pattern + r"[-_](?:cat_)?" + - end_datetime_pattern) + date_range_pattern = ( + datetime_pattern + r"[-_](?:cat_)?" + end_datetime_pattern + ) # Find dates using the regex - start_date, end_date = _get_from_pattern(datetime_pattern, - date_range_pattern, stem, - 'datetime') + start_date, end_date = _get_from_pattern( + datetime_pattern, date_range_pattern, stem, "datetime" + ) # As final resort, try to get the dates from the file contents - if ((start_date is None or end_date is None) - and isinstance(file, (str, Path)) and Path(file).exists()): + if ( + (start_date is None or end_date is None) + and isinstance(file, (str, Path)) + and Path(file).exists() + ): logger.debug("Must load file %s for daterange ", file) cubes = iris.load(file) for cube in cubes: logger.debug(cube) try: - time = cube.coord('time') + time = cube.coord("time") except iris.exceptions.CoordinateNotFoundError: continue start_date = isodate.date_isoformat( - time.cell(0).point, format=isodate.isostrf.DATE_BAS_COMPLETE) + time.cell(0).point, format=isodate.isostrf.DATE_BAS_COMPLETE + ) end_date = isodate.date_isoformat( - time.cell(-1).point, format=isodate.isostrf.DATE_BAS_COMPLETE) + time.cell(-1).point, format=isodate.isostrf.DATE_BAS_COMPLETE + ) break if start_date is None or end_date is None: raise ValueError( f"File {file} datetimes do not match a recognized pattern and " - f"time coordinate can not be read from the file") + f"time coordinate can not be read from the file" + ) # Remove potential '-' characters from datetimes - start_date = start_date.replace('-', '') - end_date = end_date.replace('-', '') + start_date = start_date.replace("-", "") + end_date = end_date.replace("-", "") return start_date, end_date def _get_start_end_year( - file: str | Path | LocalFile | ESGFFile) -> tuple[int, int]: + file: str | Path | LocalFile | ESGFFile, +) -> tuple[int, int]: """Get the start and end year as int from a file name. See :func:`_get_start_end_date`. @@ -187,26 +202,26 @@ def _dates_to_timerange(start_date, end_date): end_date = str(end_date) # Pad years with 0s if not wildcard or relative time range - if start_date != '*' and not start_date.startswith('P'): + if start_date != "*" and not start_date.startswith("P"): start_date = start_date.zfill(4) - if end_date != '*' and not end_date.startswith('P'): + if end_date != "*" and not end_date.startswith("P"): end_date = end_date.zfill(4) - return f'{start_date}/{end_date}' + return f"{start_date}/{end_date}" def _replace_years_with_timerange(variable): """Set `timerange` tag from tags `start_year` and `end_year`.""" - start_year = variable.get('start_year') - end_year = variable.get('end_year') + start_year = variable.get("start_year") + end_year = variable.get("end_year") if start_year and end_year: - variable['timerange'] = _dates_to_timerange(start_year, end_year) + variable["timerange"] = _dates_to_timerange(start_year, end_year) elif start_year: - variable['timerange'] = _dates_to_timerange(start_year, start_year) + variable["timerange"] = _dates_to_timerange(start_year, start_year) elif end_year: - variable['timerange'] = _dates_to_timerange(end_year, end_year) - variable.pop('start_year', None) - variable.pop('end_year', None) + variable["timerange"] = _dates_to_timerange(end_year, end_year) + variable.pop("start_year", None) + variable.pop("end_year", None) def _parse_period(timerange): @@ -219,40 +234,44 @@ def _parse_period(timerange): start_date = None end_date = None time_format = None - datetime_format = (isodate.DATE_BAS_COMPLETE + 'T' + - isodate.TIME_BAS_COMPLETE) - if timerange.split('/')[0].startswith('P'): + datetime_format = ( + isodate.DATE_BAS_COMPLETE + "T" + isodate.TIME_BAS_COMPLETE + ) + if timerange.split("/")[0].startswith("P"): try: - end_date = isodate.parse_datetime(timerange.split('/')[1]) + end_date = isodate.parse_datetime(timerange.split("/")[1]) time_format = datetime_format except isodate.ISO8601Error: - end_date = isodate.parse_date(timerange.split('/')[1]) + end_date = isodate.parse_date(timerange.split("/")[1]) time_format = isodate.DATE_BAS_COMPLETE - delta = isodate.parse_duration(timerange.split('/')[0]) + delta = isodate.parse_duration(timerange.split("/")[0]) start_date = end_date - delta - elif timerange.split('/')[1].startswith('P'): + elif timerange.split("/")[1].startswith("P"): try: - start_date = isodate.parse_datetime(timerange.split('/')[0]) + start_date = isodate.parse_datetime(timerange.split("/")[0]) time_format = datetime_format except isodate.ISO8601Error: - start_date = isodate.parse_date(timerange.split('/')[0]) + start_date = isodate.parse_date(timerange.split("/")[0]) time_format = isodate.DATE_BAS_COMPLETE - delta = isodate.parse_duration(timerange.split('/')[1]) + delta = isodate.parse_duration(timerange.split("/")[1]) end_date = start_date + delta if time_format == datetime_format: start_date = str( - isodate.datetime_isoformat(start_date, format=datetime_format)) + isodate.datetime_isoformat(start_date, format=datetime_format) + ) end_date = str( - isodate.datetime_isoformat(end_date, format=datetime_format)) + isodate.datetime_isoformat(end_date, format=datetime_format) + ) elif time_format == isodate.DATE_BAS_COMPLETE: - start_date = str(isodate.date_isoformat(start_date, - format=time_format)) + start_date = str( + isodate.date_isoformat(start_date, format=time_format) + ) end_date = str(isodate.date_isoformat(end_date, format=time_format)) if start_date is None and end_date is None: - start_date = timerange.split('/')[0] - end_date = timerange.split('/')[1] + start_date = timerange.split("/")[0] + end_date = timerange.split("/")[1] return start_date, end_date @@ -271,12 +290,12 @@ def _truncate_dates(date, file_date): zeros (e.g., use ``date='0100'`` and ``file_date='199901'`` for a correct comparison). """ - date = re.sub("[^0-9]", '', date) - file_date = re.sub("[^0-9]", '', file_date) + date = re.sub("[^0-9]", "", date) + file_date = re.sub("[^0-9]", "", file_date) if len(date) < len(file_date): - file_date = file_date[0:len(date)] + file_date = file_date[0 : len(date)] elif len(date) > len(file_date): - date = date[0:len(file_date)] + date = date[0 : len(file_date)] return int(date), int(file_date) @@ -290,7 +309,7 @@ def _select_files(filenames, timerange): Otherwise, the file selection occurs taking into account the time resolution of the file. """ - if '*' in timerange: + if "*" in timerange: # TODO: support * combined with a period return filenames @@ -314,19 +333,22 @@ def _replace_tags( ) -> list[Path]: """Replace tags in the config-developer's file with actual values.""" if isinstance(paths, str): - pathset = set((paths.strip('/'), )) + pathset = set((paths.strip("/"),)) else: - pathset = set(path.strip('/') for path in paths) + pathset = set(path.strip("/") for path in paths) tlist: set[str] = set() for path in pathset: - tlist = tlist.union(re.findall(r'{([^}]*)}', path)) - if 'sub_experiment' in variable: + tlist = tlist.union(re.findall(r"{([^}]*)}", path)) + if "sub_experiment" in variable: new_paths: set[str] = set() for path in pathset: new_paths.update( - (re.sub(r'(\b{ensemble}\b)', r'{sub_experiment}-\1', path), - re.sub(r'({ensemble})', r'{sub_experiment}-\1', path))) - tlist.add('sub_experiment') + ( + re.sub(r"(\b{ensemble}\b)", r"{sub_experiment}-\1", path), + re.sub(r"({ensemble})", r"{sub_experiment}-\1", path), + ) + ) + tlist.add("sub_experiment") pathset = new_paths for tag in tlist: @@ -335,8 +357,8 @@ def _replace_tags( if tag in variable: replacewith = variable[tag] - elif tag == 'version': - replacewith = '*' + elif tag == "version": + replacewith = "*" else: raise RecipeError( f"Dataset key '{tag}' must be specified for {variable}, check " @@ -355,17 +377,17 @@ def _replace_tag(paths, tag, replacewith): result.extend(_replace_tag(paths, tag, item)) else: text = _apply_caps(str(replacewith), lower, upper) - result.extend(p.replace('{' + tag + '}', text) for p in paths) + result.extend(p.replace("{" + tag + "}", text) for p in paths) return list(set(result)) def _get_caps_options(tag): lower = False upper = False - if tag.endswith('.lower'): + if tag.endswith(".lower"): lower = True tag = tag[0:-6] - elif tag.endswith('.upper'): + elif tag.endswith(".upper"): upper = True tag = tag[0:-6] return tag, lower, upper @@ -393,8 +415,10 @@ def _select_drs(input_type: str, project: str, structure: str) -> list[str]: return value raise KeyError( - 'drs {} for {} project not specified in config-developer file'.format( - structure, project)) + "drs {} for {} project not specified in config-developer file".format( + structure, project + ) + ) @dataclass(order=True, frozen=True) @@ -409,8 +433,11 @@ def get_glob_patterns(self, **facets) -> list[Path]: """Compose the globs that will be used to look for files.""" dirname_globs = _replace_tags(self.dirname_template, facets) filename_globs = _replace_tags(self.filename_template, facets) - return sorted(self.rootpath / d / f for d in dirname_globs - for f in filename_globs) + return sorted( + self.rootpath / d / f + for d in dirname_globs + for f in filename_globs + ) def find_files(self, **facets) -> list[LocalFile]: """Find files.""" @@ -425,8 +452,8 @@ def find_files(self, **facets) -> list[LocalFile]: files.append(file) files.sort() # sorting makes it easier to see what was found - if 'timerange' in facets: - files = _select_files(files, facets['timerange']) + if "timerange" in facets: + files = _select_files(files, facets["timerange"]) return files @@ -435,51 +462,55 @@ def find_files(self, **facets) -> list[LocalFile]: def _get_data_sources(project: str) -> list[DataSource]: """Get a list of data sources.""" - rootpaths = CFG['rootpath'] - for key in (project, 'default'): + rootpaths = CFG["rootpath"] + for key in (project, "default"): if key in rootpaths: paths = rootpaths[key] nonexistent = tuple(p for p in paths if not os.path.exists(p)) if nonexistent and (key, nonexistent) not in _ROOTPATH_WARNED: logger.warning( "'%s' rootpaths '%s' set in config-user.yml do not exist", - key, ', '.join(str(p) for p in nonexistent)) + key, + ", ".join(str(p) for p in nonexistent), + ) _ROOTPATH_WARNED.add((key, nonexistent)) if isinstance(paths, list): - structure = CFG['drs'].get(project, 'default') + structure = CFG["drs"].get(project, "default") paths = {p: structure for p in paths} sources: list[DataSource] = [] for path, structure in paths.items(): - dir_templates = _select_drs('input_dir', project, structure) - file_templates = _select_drs('input_file', project, structure) + dir_templates = _select_drs("input_dir", project, structure) + file_templates = _select_drs("input_file", project, structure) sources.extend( DataSource(path, d, f) - for d in dir_templates for f in file_templates + for d in dir_templates + for f in file_templates ) return sources raise KeyError( f"No '{project}' or 'default' path specified under 'rootpath' in " - "the user configuration.") + "the user configuration." + ) def _get_output_file(variable: dict[str, Any], preproc_dir: Path) -> Path: """Return the full path to the output (preprocessed) file.""" - cfg = get_project_config(variable['project']) + cfg = get_project_config(variable["project"]) # Join different experiment names - if isinstance(variable.get('exp'), (list, tuple)): + if isinstance(variable.get("exp"), (list, tuple)): variable = dict(variable) - variable['exp'] = '-'.join(variable['exp']) - outfile = _replace_tags(cfg['output_file'], variable)[0] - if 'timerange' in variable: - timerange = variable['timerange'].replace('/', '-') - outfile = Path(f'{outfile}_{timerange}') + variable["exp"] = "-".join(variable["exp"]) + outfile = _replace_tags(cfg["output_file"], variable)[0] + if "timerange" in variable: + timerange = variable["timerange"].replace("/", "-") + outfile = Path(f"{outfile}_{timerange}") outfile = Path(f"{outfile}.nc") return Path( preproc_dir, - variable.get('diagnostic', ''), - variable.get('variable_group', ''), + variable.get("diagnostic", ""), + variable.get("variable_group", ""), outfile, ) @@ -487,8 +518,13 @@ def _get_output_file(variable: dict[str, Any], preproc_dir: Path) -> Path: def _get_multiproduct_filename(attributes: dict, preproc_dir: Path) -> Path: """Get ensemble/multi-model filename depending on settings.""" relevant_keys = [ - 'project', 'dataset', 'exp', 'ensemble_statistics', - 'multi_model_statistics', 'mip', 'short_name' + "project", + "dataset", + "exp", + "ensemble_statistics", + "multi_model_statistics", + "mip", + "short_name", ] filename_segments = [] @@ -496,8 +532,8 @@ def _get_multiproduct_filename(attributes: dict, preproc_dir: Path) -> Path: if key in attributes: attribute = attributes[key] if isinstance(attribute, (list, tuple)): - attribute = '-'.join(attribute) - filename_segments.extend(attribute.split('_')) + attribute = "-".join(attribute) + filename_segments.extend(attribute.split("_")) # Remove duplicate segments: filename_segments = list(dict.fromkeys(filename_segments)) @@ -507,9 +543,9 @@ def _get_multiproduct_filename(attributes: dict, preproc_dir: Path) -> Path: outfile = Path( preproc_dir, - attributes['diagnostic'], - attributes['variable_group'], - '_'.join(filename_segments), + attributes["diagnostic"], + attributes["variable_group"], + "_".join(filename_segments), ) return outfile @@ -518,14 +554,13 @@ def _get_multiproduct_filename(attributes: dict, preproc_dir: Path) -> Path: def _path2facets(path: Path, drs: str) -> dict[str, str]: """Extract facets from a path using a DRS like '{facet1}/{facet2}'.""" keys = [] - for key in re.findall(r'{(.*?)}[^-]', f'{drs} '): - key = key.split('.')[0] # Remove trailing .lower and .upper + for key in re.findall(r"{(.*?)}[^-]", f"{drs} "): + key = key.split(".")[0] # Remove trailing .lower and .upper keys.append(key) start, end = -len(keys) - 1, -1 values = path.parts[start:end] facets = { - key: values[idx] - for idx, key in enumerate(keys) if "{" not in key + key: values[idx] for idx, key in enumerate(keys) if "{" not in key } if len(facets) != len(keys): @@ -534,13 +569,14 @@ def _path2facets(path: Path, drs: str) -> dict[str, str]: for idx, key in enumerate(keys): if key not in facets: facet1, facet2 = key.split("}-{") - facets[facet2] = values[idx].replace(f'{facets[facet1]}-', '') + facets[facet2] = values[idx].replace(f"{facets[facet1]}-", "") return facets def _filter_versions_called_latest( - files: list['LocalFile']) -> list['LocalFile']: + files: list["LocalFile"], +) -> list["LocalFile"]: """Filter out versions called 'latest' if they are duplicates. On compute clusters it is usual to have a symbolic link to the @@ -549,26 +585,30 @@ def _filter_versions_called_latest( """ resolved_valid_versions = { f.resolve(strict=False) - for f in files if f.facets.get('version') != 'latest' + for f in files + if f.facets.get("version") != "latest" } return [ - f for f in files if f.facets.get('version') != 'latest' or f.resolve( - strict=False) not in resolved_valid_versions + f + for f in files + if f.facets.get("version") != "latest" + or f.resolve(strict=False) not in resolved_valid_versions ] -def _select_latest_version(files: list['LocalFile']) -> list['LocalFile']: +def _select_latest_version(files: list["LocalFile"]) -> list["LocalFile"]: """Select only the latest version of files.""" def filename(file): return file.name def version(file): - return file.facets.get('version', '') + return file.facets.get("version", "") result = [] - for _, group in itertools.groupby(sorted(files, key=filename), - key=filename): + for _, group in itertools.groupby( + sorted(files, key=filename), key=filename + ): duplicates = sorted(group, key=version) latest = duplicates[-1] result.append(latest) @@ -640,22 +680,22 @@ def find_files( The files that were found. """ facets = dict(facets) - if 'original_short_name' in facets: - facets['short_name'] = facets['original_short_name'] + if "original_short_name" in facets: + facets["short_name"] = facets["original_short_name"] files = [] filter_latest = False - data_sources = _get_data_sources(facets['project']) # type: ignore + data_sources = _get_data_sources(facets["project"]) # type: ignore for data_source in data_sources: for file in data_source.find_files(**facets): - if file.facets.get('version') == 'latest': + if file.facets.get("version") == "latest": filter_latest = True files.append(file) if filter_latest: files = _filter_versions_called_latest(files) - if 'version' not in facets: + if "version" not in facets: files = _select_latest_version(files) files.sort() # sorting makes it easier to see what was found @@ -680,7 +720,7 @@ def facets(self) -> Facets: When using :func:`find_files`, facets are read from the directory structure. Facets stored in filenames are not yet supported. """ - if not hasattr(self, '_facets'): + if not hasattr(self, "_facets"): self._facets: Facets = {} return self._facets diff --git a/tests/unit/cmor/test_cmor_check.py b/tests/unit/cmor/test_cmor_check.py index f4531a9a76..30a23ade0f 100644 --- a/tests/unit/cmor/test_cmor_check.py +++ b/tests/unit/cmor/test_cmor_check.py @@ -29,29 +29,29 @@ class VariableInfoMock: """Mock for the variables definition.""" def __init__(self): - self.table_type = 'CMIP5' - self.short_name = 'short_name' - self.standard_name = 'age_of_sea_ice' # Iris don't accept fakes ... - self.long_name = 'Long Name' - self.units = 'years' # ... nor in the units - self.valid_min = '0' - self.valid_max = '100' - self.frequency = 'day' - self.positive = '' - - generic_level = CoordinateInfoMock('depth') + self.table_type = "CMIP5" + self.short_name = "short_name" + self.standard_name = "age_of_sea_ice" # Iris don't accept fakes ... + self.long_name = "Long Name" + self.units = "years" # ... nor in the units + self.valid_min = "0" + self.valid_max = "100" + self.frequency = "day" + self.positive = "" + + generic_level = CoordinateInfoMock("depth") generic_level.generic_level = True - generic_level.axis = 'Z' + generic_level.axis = "Z" - requested = CoordinateInfoMock('air_pressure') + requested = CoordinateInfoMock("air_pressure") requested.requested = [str(number) for number in range(20)] self.coordinates = { - 'time': CoordinateInfoMock('time'), - 'lat': CoordinateInfoMock('lat'), - 'lon': CoordinateInfoMock('lon'), - 'air_pressure': requested, - 'depth': generic_level, + "time": CoordinateInfoMock("time"), + "lat": CoordinateInfoMock("lat"), + "lon": CoordinateInfoMock("lon"), + "air_pressure": requested, + "depth": generic_level, } @@ -64,7 +64,7 @@ def __init__(self, name): self.axis = "" self.value = "" - standard_names = {'lat': 'latitude', 'lon': 'longitude'} + standard_names = {"lat": "latitude", "lon": "longitude"} if name in standard_names: self.standard_name = standard_names[name] else: @@ -74,9 +74,9 @@ def __init__(self, name): self.var_name = self.name units = { - 'lat': 'degrees_north', - 'lon': 'degrees_east', - 'time': 'days since 1950-01-01 00:00:00' + "lat": "degrees_north", + "lon": "degrees_east", + "time": "days since 1950-01-01 00:00:00", } if name in units: self.units = units[name] @@ -89,7 +89,7 @@ def __init__(self, name): self.generic_lev_coords = {} self.generic_lev_name = "" - valid_limits = {'lat': ('-90', '90'), 'lon': ('0', '360')} + valid_limits = {"lat": ("-90", "90"), "lon": ("0", "360")} if name in valid_limits: self.valid_min = valid_limits[name][0] self.valid_max = valid_limits[name][1] @@ -110,70 +110,77 @@ def test_report_error(self): """Test report error function.""" checker = CMORCheck(self.cube, self.var_info) self.assertFalse(checker.has_errors()) - checker.report_critical('New error: {}', 'something failed') + checker.report_critical("New error: {}", "something failed") self.assertTrue(checker.has_errors()) def test_fail_on_error(self): """Test exception is raised if fail_on_error is activated.""" checker = CMORCheck(self.cube, self.var_info, fail_on_error=True) with self.assertRaises(CMORCheckError): - checker.report_critical('New error: {}', 'something failed') + checker.report_critical("New error: {}", "something failed") def test_report_warning(self): """Test report warning function.""" checker = CMORCheck(self.cube, self.var_info) self.assertFalse(checker.has_errors()) - checker.report_warning('New error: {}', 'something failed') + checker.report_warning("New error: {}", "something failed") self.assertTrue(checker.has_warnings()) def test_warning_fail_on_error(self): """Test report warning function with fail_on_error.""" checker = CMORCheck(self.cube, self.var_info, fail_on_error=True) - with self.assertLogs(level='WARNING') as cm: - checker.report_warning('New error: {}', 'something failed') + with self.assertLogs(level="WARNING") as cm: + checker.report_warning("New error: {}", "something failed") self.assertEqual( cm.output, - ['WARNING:esmvalcore.cmor.check:New error: something failed', ] + [ + "WARNING:esmvalcore.cmor.check:New error: something failed", + ], ) def test_report_debug_message(self): - """"Test report debug message function""" + """ "Test report debug message function""" checker = CMORCheck(self.cube, self.var_info) self.assertFalse(checker.has_debug_messages()) - checker.report_debug_message('New debug message') + checker.report_debug_message("New debug message") self.assertTrue(checker.has_debug_messages()) def test_check(self): """Test checks succeeds for a good cube.""" self._check_cube() - def _check_cube(self, frequency=None, - check_level=CheckLevels.DEFAULT): + def _check_cube(self, frequency=None, check_level=CheckLevels.DEFAULT): """Apply checks to self.cube.""" + def checker(cube): return CMORCheck( cube, self.var_info, frequency=frequency, - check_level=check_level) + check_level=check_level, + ) self.cube = checker(self.cube).check_metadata() self.cube = checker(self.cube).check_data() - def _check_cube_metadata(self, frequency=None, - check_level=CheckLevels.DEFAULT): + def _check_cube_metadata( + self, frequency=None, check_level=CheckLevels.DEFAULT + ): """Apply checks to self.cube.""" + def checker(cube): return CMORCheck( cube, self.var_info, frequency=frequency, - check_level=check_level) + check_level=check_level, + ) self.cube = checker(self.cube).check_metadata() def test_check_with_custom_logger(self): """Test checks with custom logger.""" + def checker(cube): return CMORCheck(cube, self.var_info) @@ -182,86 +189,87 @@ def checker(cube): def test_check_with_month_number(self): """Test checks succeeds for a good cube with month number.""" - iris.coord_categorisation.add_month_number(self.cube, 'time') + iris.coord_categorisation.add_month_number(self.cube, "time") self._check_cube() def test_check_with_day_of_month(self): """Test checks succeeds for a good cube with day of month.""" - iris.coord_categorisation.add_day_of_month(self.cube, 'time') + iris.coord_categorisation.add_day_of_month(self.cube, "time") self._check_cube() def test_check_with_day_of_year(self): """Test checks succeeds for a good cube with day of year.""" - iris.coord_categorisation.add_day_of_year(self.cube, 'time') + iris.coord_categorisation.add_day_of_year(self.cube, "time") self._check_cube() def test_check_with_year(self): """Test checks succeeds for a good cube with year.""" - iris.coord_categorisation.add_year(self.cube, 'time') + iris.coord_categorisation.add_year(self.cube, "time") self._check_cube() def test_check_no_multiple_coords_same_stdname(self): """Test checks fails if two coords have the same standard_name.""" self.cube.add_aux_coord( iris.coords.AuxCoord( - np.reshape(np.linspace(-90, 90, num=20*20), (20, 20)), - var_name='bad_name', - standard_name='latitude', - units='degrees_north' + np.reshape(np.linspace(-90, 90, num=20 * 20), (20, 20)), + var_name="bad_name", + standard_name="latitude", + units="degrees_north", ), - (1, 2) + (1, 2), ) self._check_fails_in_metadata() def test_check_bad_standard_name(self): """Test check fails for a bad short_name.""" - self.cube.standard_name = 'wind_speed' + self.cube.standard_name = "wind_speed" self._check_fails_in_metadata() def test_check_bad_long_name(self): """Test check fails for a bad short_name.""" - self.cube.long_name = 'bad_name' + self.cube.long_name = "bad_name" self._check_fails_in_metadata() def test_check_bad_units(self): """Test check fails for bad units.""" - self.cube.units = 'days' + self.cube.units = "days" self._check_fails_in_metadata() def test_check_with_positive(self): """Check variable with positive attribute.""" - self.var_info.positive = 'up' + self.var_info.positive = "up" self.cube = self.get_cube(self.var_info) self._check_cube() def test_check_with_no_positive_cmip5(self): """Check CMIP5 variable with no positive attribute report warning.""" self.cube = self.get_cube(self.var_info) - self.var_info.positive = 'up' + self.var_info.positive = "up" self._check_warnings_on_metadata() def test_check_with_no_positive_cmip6(self): """Check CMIP6 variable with no positive attribute report warning.""" - self.var_info.positive = 'up' - self.var_info.table_type = 'CMIP6' + self.var_info.positive = "up" + self.var_info.table_type = "CMIP6" self._check_warnings_on_metadata() def test_invalid_rank(self): """Test check fails in metadata step when rank is not correct.""" - lat = iris.coords.AuxCoord.from_coord(self.cube.coord('latitude')) - self.cube.remove_coord('latitude') - self.cube.add_aux_coord(lat, self.cube.coord_dims('longitude')) + lat = iris.coords.AuxCoord.from_coord(self.cube.coord("latitude")) + self.cube.remove_coord("latitude") + self.cube.add_aux_coord(lat, self.cube.coord_dims("longitude")) self._check_fails_in_metadata() def test_rank_with_aux_coords(self): """Check succeeds even if a required coordinate is an aux coord.""" - iris.util.demote_dim_coord_to_aux_coord(self.cube, 'latitude') + iris.util.demote_dim_coord_to_aux_coord(self.cube, "latitude") self._check_cube() def test_rank_with_scalar_coords(self): """Check succeeds even if a required coordinate is a scalar coord.""" self.cube = self.cube.extract( - iris.Constraint(time=self.cube.coord('time').cell(0))) + iris.Constraint(time=self.cube.coord("time").cell(0)) + ) self._check_cube() def test_rank_unstructured_grid(self): @@ -272,15 +280,16 @@ def test_rank_unstructured_grid(self): def test_bad_generic_level(self): """Test check fails in metadata if generic level coord has wrong var_name.""" - depth_coord = CoordinateInfoMock('depth') - depth_coord.axis = 'Z' - depth_coord.generic_lev_name = 'olevel' - depth_coord.out_name = 'lev' - depth_coord.name = 'depth_coord' - depth_coord.long_name = 'ocean depth coordinate' - self.var_info.coordinates['depth'].generic_lev_coords = { - 'depth_coord': depth_coord} - self.var_info.coordinates['depth'].out_name = "" + depth_coord = CoordinateInfoMock("depth") + depth_coord.axis = "Z" + depth_coord.generic_lev_name = "olevel" + depth_coord.out_name = "lev" + depth_coord.name = "depth_coord" + depth_coord.long_name = "ocean depth coordinate" + self.var_info.coordinates["depth"].generic_lev_coords = { + "depth_coord": depth_coord + } + self.var_info.coordinates["depth"].out_name = "" self._check_fails_in_metadata() def test_valid_generic_level(self): @@ -301,51 +310,52 @@ def test_valid_generic_level_automatic_fixes(self): def test_invalid_generic_level(self): """Test invalid generic level coordinate.""" self._setup_generic_level_var() - self.cube.remove_coord('atmosphere_sigma_coordinate') + self.cube.remove_coord("atmosphere_sigma_coordinate") self._check_fails_in_metadata() def test_generic_level_alternative_cmip3(self): """Test valid alternative for generic level coords (CMIP3).""" - self.var_info.table_type = 'CMIP3' + self.var_info.table_type = "CMIP3" self._setup_generic_level_var() - self.var_info.coordinates['zlevel'] = self.var_info.coordinates.pop( - 'alevel') + self.var_info.coordinates["zlevel"] = self.var_info.coordinates.pop( + "alevel" + ) self._add_plev_to_cube() self._check_warnings_on_metadata() def test_generic_level_alternative_cmip5(self): """Test valid alternative for generic level coords (CMIP5).""" - self.var_info.table_type = 'CMIP5' + self.var_info.table_type = "CMIP5" self._setup_generic_level_var() self._add_plev_to_cube() self._check_warnings_on_metadata() def test_generic_level_alternative_cmip6(self): """Test valid alternative for generic level coords (CMIP6).""" - self.var_info.table_type = 'CMIP6' + self.var_info.table_type = "CMIP6" self._setup_generic_level_var() self._add_plev_to_cube() self._check_warnings_on_metadata() def test_generic_level_alternative_obs4mips(self): """Test valid alternative for generic level coords (obs4MIPs).""" - self.var_info.table_type = 'obs4MIPs' + self.var_info.table_type = "obs4MIPs" self._setup_generic_level_var() self._add_plev_to_cube() self._check_warnings_on_metadata() def test_generic_level_invalid_alternative(self): """Test invalid alternative for generic level coords.""" - self.var_info.table_type = 'CMIP6' + self.var_info.table_type = "CMIP6" self._setup_generic_level_var() self._add_plev_to_cube() - self.cube.coord('air_pressure').standard_name = 'altitude' + self.cube.coord("air_pressure").standard_name = "altitude" self._check_fails_in_metadata() def test_check_bad_var_standard_name_strict_flag(self): """Test check fails for a bad variable standard_name with --cmor-check strict.""" - self.cube.standard_name = 'wind_speed' + self.cube.standard_name = "wind_speed" self._check_fails_in_metadata() def test_check_bad_var_long_name_strict_flag(self): @@ -366,54 +376,54 @@ def test_check_bad_attributes_strict_flag(self): self.var_info.standard_name = "surface_upward_latent_heat_flux" self.var_info.positive = "up" self.cube = self.get_cube(self.var_info) - self.cube.attributes['positive'] = "Wrong attribute" + self.cube.attributes["positive"] = "Wrong attribute" self._check_fails_in_metadata() def test_check_bad_rank_strict_flag(self): """Test check fails for a bad variable rank with --cmor-check strict.""" - lat = iris.coords.AuxCoord.from_coord(self.cube.coord('latitude')) - self.cube.remove_coord('latitude') - self.cube.add_aux_coord(lat, self.cube.coord_dims('longitude')) + lat = iris.coords.AuxCoord.from_coord(self.cube.coord("latitude")) + self.cube.remove_coord("latitude") + self.cube.add_aux_coord(lat, self.cube.coord_dims("longitude")) self._check_fails_in_metadata() def test_check_bad_coord_var_name_strict_flag(self): """Test check fails for bad coord var_name with --cmor-check strict""" - self.var_info.table_type = 'CMIP5' - self.cube.coord('longitude').var_name = 'bad_name' + self.var_info.table_type = "CMIP5" + self.cube.coord("longitude").var_name = "bad_name" self._check_fails_in_metadata() def test_check_missing_lon_strict_flag(self): """Test check fails for missing longitude with --cmor-check strict""" - self.var_info.table_type = 'CMIP5' - self.cube.remove_coord('longitude') + self.var_info.table_type = "CMIP5" + self.cube.remove_coord("longitude") self._check_fails_in_metadata() def test_check_missing_lat_strict_flag(self): """Test check fails for missing latitude with --cmor-check strict""" - self.var_info.table_type = 'CMIP5' - self.cube.remove_coord('latitude') + self.var_info.table_type = "CMIP5" + self.cube.remove_coord("latitude") self._check_fails_in_metadata() def test_check_missing_time_strict_flag(self): """Test check fails for missing time with --cmor-check strict""" - self.var_info.table_type = 'CMIP5' - self.cube.remove_coord('time') + self.var_info.table_type = "CMIP5" + self.cube.remove_coord("time") self._check_fails_in_metadata() def test_check_missing_coord_strict_flag(self): """Test check fails for missing coord other than lat and lon - with --cmor-check strict""" + with --cmor-check strict""" self.var_info.coordinates.update( - {'height2m': CoordinateInfoMock('height2m')} + {"height2m": CoordinateInfoMock("height2m")} ) self._check_fails_in_metadata() def test_check_bad_var_standard_name_relaxed_flag(self): """Test check reports warning for a bad variable standard_name with --cmor-check relaxed.""" - self.cube.standard_name = 'wind_speed' + self.cube.standard_name = "wind_speed" self._check_warnings_on_metadata(check_level=CheckLevels.RELAXED) def test_check_bad_var_long_name_relaxed_flag(self): @@ -434,54 +444,54 @@ def test_check_bad_attributes_relaxed_flag(self): self.var_info.standard_name = "surface_upward_latent_heat_flux" self.var_info.positive = "up" self.cube = self.get_cube(self.var_info) - self.cube.attributes['positive'] = "Wrong attribute" + self.cube.attributes["positive"] = "Wrong attribute" self._check_warnings_on_metadata(check_level=CheckLevels.RELAXED) def test_check_bad_rank_relaxed_flag(self): """Test check report warnings for a bad variable rank with --cmor-check relaxed.""" - lat = iris.coords.AuxCoord.from_coord(self.cube.coord('latitude')) - self.cube.remove_coord('latitude') - self.cube.add_aux_coord(lat, self.cube.coord_dims('longitude')) + lat = iris.coords.AuxCoord.from_coord(self.cube.coord("latitude")) + self.cube.remove_coord("latitude") + self.cube.add_aux_coord(lat, self.cube.coord_dims("longitude")) self._check_warnings_on_metadata(check_level=CheckLevels.RELAXED) def test_check_bad_coord_standard_name_relaxed_flag(self): """Test check reports warning for bad coord var_name with --cmor-check relaxed""" - self.var_info.table_type = 'CMIP5' - self.cube.coord('longitude').var_name = 'bad_name' + self.var_info.table_type = "CMIP5" + self.cube.coord("longitude").var_name = "bad_name" self._check_warnings_on_metadata(check_level=CheckLevels.RELAXED) def test_check_missing_lon_relaxed_flag(self): """Test check fails for missing longitude with --cmor-check relaxed""" - self.var_info.table_type = 'CMIP5' - self.cube.remove_coord('longitude') + self.var_info.table_type = "CMIP5" + self.cube.remove_coord("longitude") self._check_fails_in_metadata(check_level=CheckLevels.RELAXED) def test_check_missing_lat_relaxed_flag(self): """Test check fails for missing latitude with --cmor-check relaxed""" - self.var_info.table_type = 'CMIP5' - self.cube.remove_coord('latitude') + self.var_info.table_type = "CMIP5" + self.cube.remove_coord("latitude") self._check_fails_in_metadata(check_level=CheckLevels.RELAXED) def test_check_missing_time_relaxed_flag(self): """Test check fails for missing latitude with --cmor-check relaxed""" - self.var_info.table_type = 'CMIP5' - self.cube.remove_coord('time') + self.var_info.table_type = "CMIP5" + self.cube.remove_coord("time") self._check_fails_in_metadata(check_level=CheckLevels.RELAXED) def test_check_missing_coord_relaxed_flag(self): """Test check reports warning for missing coord other than lat and lon with --cmor-check relaxed""" self.var_info.coordinates.update( - {'height2m': CoordinateInfoMock('height2m')} + {"height2m": CoordinateInfoMock("height2m")} ) self._check_warnings_on_metadata(check_level=CheckLevels.RELAXED) def test_check_bad_var_standard_name_none_flag(self): """Test check reports warning for a bad variable standard_name with --cmor-check ignore.""" - self.cube.standard_name = 'wind_speed' + self.cube.standard_name = "wind_speed" self._check_warnings_on_metadata(check_level=CheckLevels.IGNORE) def test_check_bad_var_long_name_none_flag(self): @@ -502,83 +512,84 @@ def test_check_bad_attributes_none_flag(self): self.var_info.standard_name = "surface_upward_latent_heat_flux" self.var_info.positive = "up" self.cube = self.get_cube(self.var_info) - self.cube.attributes['positive'] = "Wrong attribute" + self.cube.attributes["positive"] = "Wrong attribute" self._check_warnings_on_metadata(check_level=CheckLevels.IGNORE) def test_check_bad_rank_none_flag(self): """Test check reports warning for a bad variable rank with --cmor-check ignore.""" - lat = iris.coords.AuxCoord.from_coord(self.cube.coord('latitude')) - self.cube.remove_coord('latitude') - self.cube.add_aux_coord(lat, self.cube.coord_dims('longitude')) + lat = iris.coords.AuxCoord.from_coord(self.cube.coord("latitude")) + self.cube.remove_coord("latitude") + self.cube.add_aux_coord(lat, self.cube.coord_dims("longitude")) self._check_warnings_on_metadata(check_level=CheckLevels.IGNORE) def test_check_bad_coord_standard_name_none_flag(self): """Test check reports warning for bad coord var_name with --cmor-check ignore.""" - self.var_info.table_type = 'CMIP5' - self.cube.coord('longitude').var_name = 'bad_name' + self.var_info.table_type = "CMIP5" + self.cube.coord("longitude").var_name = "bad_name" self._check_warnings_on_metadata(check_level=CheckLevels.IGNORE) def test_check_missing_lon_none_flag(self): """Test check reports warning for missing longitude with --cmor-check ignore""" - self.var_info.table_type = 'CMIP5' - self.cube.remove_coord('longitude') + self.var_info.table_type = "CMIP5" + self.cube.remove_coord("longitude") self._check_warnings_on_metadata(check_level=CheckLevels.IGNORE) def test_check_missing_lat_none_flag(self): """Test check reports warning for missing latitude with --cmor-check ignore""" - self.var_info.table_type = 'CMIP5' - self.cube.remove_coord('latitude') + self.var_info.table_type = "CMIP5" + self.cube.remove_coord("latitude") self._check_warnings_on_metadata(check_level=CheckLevels.IGNORE) def test_check_missing_time_none_flag(self): """Test check reports warning for missing time with --cmor-check ignore""" - self.var_info.table_type = 'CMIP5' - self.cube.remove_coord('time') + self.var_info.table_type = "CMIP5" + self.cube.remove_coord("time") self._check_warnings_on_metadata(check_level=CheckLevels.IGNORE) def test_check_missing_coord_none_flag(self): """Test check reports warning for missing coord other than lat, lon and time with --cmor-check ignore""" self.var_info.coordinates.update( - {'height2m': CoordinateInfoMock('height2m')} + {"height2m": CoordinateInfoMock("height2m")} ) self._check_warnings_on_metadata(check_level=CheckLevels.IGNORE) def test_check_lazy(self): """Test checker does not realise data or aux_coords.""" self.cube.data = self.cube.lazy_data() - self.cube.remove_coord('latitude') - self.cube.remove_coord('longitude') + self.cube.remove_coord("latitude") + self.cube.remove_coord("longitude") self.cube.add_aux_coord( iris.coords.AuxCoord( - da.reshape(da.linspace(-90, 90, num=20*20), (20, 20)), - var_name='lat', - standard_name='latitude', - units='degrees_north' + da.reshape(da.linspace(-90, 90, num=20 * 20), (20, 20)), + var_name="lat", + standard_name="latitude", + units="degrees_north", ), - (1, 2) + (1, 2), ) self.cube.add_aux_coord( iris.coords.AuxCoord( - da.reshape(da.linspace(0, 360, num=20*20), (20, 20)), - var_name='lon', - standard_name='longitude', - units='degrees_east' + da.reshape(da.linspace(0, 360, num=20 * 20), (20, 20)), + var_name="lon", + standard_name="longitude", + units="degrees_east", ), - (1, 2) + (1, 2), ) self._check_cube() - self.assertTrue(self.cube.coord('latitude').has_lazy_points()) - self.assertTrue(self.cube.coord('longitude').has_lazy_points()) + self.assertTrue(self.cube.coord("latitude").has_lazy_points()) + self.assertTrue(self.cube.coord("longitude").has_lazy_points()) self.assertTrue(self.cube.has_lazy_data()) - def _check_fails_in_metadata(self, frequency=None, - check_level=CheckLevels.DEFAULT): + def _check_fails_in_metadata( + self, frequency=None, check_level=CheckLevels.DEFAULT + ): checker = CMORCheck( self.cube, self.var_info, @@ -589,15 +600,14 @@ def _check_fails_in_metadata(self, frequency=None, checker.check_metadata() def _check_warnings_on_metadata(self, check_level=CheckLevels.DEFAULT): - checker = CMORCheck( - self.cube, self.var_info, check_level=check_level - ) + checker = CMORCheck(self.cube, self.var_info, check_level=check_level) checker.check_metadata() self.assertTrue(checker.has_warnings()) def _check_debug_messages_on_metadata(self): checker = CMORCheck( - self.cube, self.var_info, + self.cube, + self.var_info, ) checker.check_metadata() self.assertTrue(checker.has_debug_messages()) @@ -609,7 +619,7 @@ def test_non_requested(self): Check issue a warning if a values requested for a coordinate are not correct in the metadata step """ - coord = self.cube.coord('air_pressure') + coord = self.cube.coord("air_pressure") values = np.linspace(0, 40, len(coord.points)) self._update_coordinate_values(self.cube, coord, values) checker = CMORCheck(self.cube, self.var_info) @@ -623,29 +633,33 @@ def test_requested_str_values(self): Check issue a warning if a values requested for a coordinate are not correct in the metadata step """ - region_coord = CoordinateInfoMock('basin') - region_coord.standard_name = 'region' - region_coord.units = '' + region_coord = CoordinateInfoMock("basin") + region_coord.standard_name = "region" + region_coord.units = "" region_coord.requested = [ "atlantic_arctic_ocean", "indian_pacific_ocean", - "global_ocean" + "global_ocean", ] - self.var_info.coordinates['region'] = region_coord + self.var_info.coordinates["region"] = region_coord self.cube = self.get_cube(self.var_info) self._check_cube() def test_requested_non_1d(self): """Warning if requested values in non-1d cannot be checked.""" - coord = self.cube.coord('air_pressure') + coord = self.cube.coord("air_pressure") values = np.linspace(0, 40, len(coord.points)) values = np.broadcast_to(values, (20, 20)) bounds = np.moveaxis(np.stack((values - 0.01, values + 0.01)), 0, -1) new_plev_coord = iris.coords.AuxCoord( - values, bounds=bounds, var_name=coord.var_name, - standard_name=coord.standard_name, long_name=coord.long_name, - units=coord.units) - self.cube.remove_coord('air_pressure') + values, + bounds=bounds, + var_name=coord.var_name, + standard_name=coord.standard_name, + long_name=coord.long_name, + units=coord.units, + ) + self.cube.remove_coord("air_pressure") self.cube.add_aux_coord(new_plev_coord, (2, 3)) checker = CMORCheck(self.cube, self.var_info) checker.check_metadata() @@ -654,37 +668,35 @@ def test_requested_non_1d(self): def test_non_increasing(self): """Fail in metadata if increasing coordinate is decreasing.""" - coord = self.cube.coord('latitude') + coord = self.cube.coord("latitude") values = np.linspace( - coord.points[-1], - coord.points[0], - len(coord.points) + coord.points[-1], coord.points[0], len(coord.points) ) self._update_coordinate_values(self.cube, coord, values) self._check_fails_in_metadata() def test_non_decreasing(self): """Fail in metadata if decreasing coordinate is increasing.""" - self.var_info.coordinates['lat'].stored_direction = 'decreasing' + self.var_info.coordinates["lat"].stored_direction = "decreasing" self._check_fails_in_metadata() # TODO: remove in v2.12 def test_non_decreasing_automatic_fix_metadata(self): """Automatic fix for decreasing coordinate.""" - self.var_info.coordinates['lat'].stored_direction = 'decreasing' + self.var_info.coordinates["lat"].stored_direction = "decreasing" checker = CMORCheck(self.cube, self.var_info, automatic_fixes=True) checker.check_metadata() # TODO: remove in v2.12 def test_non_decreasing_automatic_fix_data(self): """Automatic fix for decreasing coordinate.""" - self.var_info.coordinates['lat'].stored_direction = 'decreasing' + self.var_info.coordinates["lat"].stored_direction = "decreasing" checker = CMORCheck(self.cube, self.var_info, automatic_fixes=True) checker.check_data() def test_lat_non_monotonic(self): """Test fail for non monotonic latitude.""" - lat = self.cube.coord('latitude') + lat = self.cube.coord("latitude") points = np.array(lat.points) points[-1] = points[0] dims = self.cube.coord_dims(lat) @@ -695,43 +707,39 @@ def test_lat_non_monotonic(self): def test_not_bounds(self): """Warning if bounds are not available.""" - self.cube.coord('longitude').bounds = None + self.cube.coord("longitude").bounds = None self._check_warnings_on_metadata() - self.assertFalse(self.cube.coord('longitude').has_bounds()) + self.assertFalse(self.cube.coord("longitude").has_bounds()) def test_not_correct_lons(self): """Fail if longitudes are not correct in metadata step.""" - self.cube = self.cube.intersection(longitude=(-180., 180.)) + self.cube = self.cube.intersection(longitude=(-180.0, 180.0)) self._check_fails_in_metadata() def test_high_lons(self): """Test bad longitudes.""" - self.cube = self.cube.intersection(longitude=(720., 1080.)) + self.cube = self.cube.intersection(longitude=(720.0, 1080.0)) self._check_fails_in_metadata() def test_low_lons(self): """Test bad longitudes.""" - self.cube = self.cube.intersection(longitude=(-720., -360.)) + self.cube = self.cube.intersection(longitude=(-720.0, -360.0)) self._check_fails_in_metadata() def test_not_valid_min(self): """Fail if coordinate values below valid_min.""" - coord = self.cube.coord('latitude') + coord = self.cube.coord("latitude") values = np.linspace( - coord.points[0] - 1, - coord.points[-1], - len(coord.points) + coord.points[0] - 1, coord.points[-1], len(coord.points) ) self._update_coordinate_values(self.cube, coord, values) self._check_fails_in_metadata() def test_not_valid_max(self): """Fail if coordinate values above valid_max.""" - coord = self.cube.coord('latitude') + coord = self.cube.coord("latitude") values = np.linspace( - coord.points[0], - coord.points[-1] + 1, - len(coord.points) + coord.points[0], coord.points[-1] + 1, len(coord.points) ) self._update_coordinate_values(self.cube, coord, values) self._check_fails_in_metadata() @@ -745,22 +753,23 @@ def _update_coordinate_values(cube, coord, values): standard_name=coord.standard_name, long_name=coord.long_name, var_name=coord.var_name, - units=coord.units) + units=coord.units, + ) cube.add_dim_coord(new_coord, dimension) def test_bad_units(self): """Fail if coordinates have bad units.""" - self.cube.coord('latitude').units = 'degrees_n' + self.cube.coord("latitude").units = "degrees_n" self._check_fails_in_metadata() def test_non_convertible_units(self): """Test fail for incompatible coordinate units.""" - self.cube.coord('latitude').units = 'degC' + self.cube.coord("latitude").units = "degC" self._check_fails_in_metadata() def test_bad_time(self): """Fail if time have bad units.""" - self.cube.coord('time').units = 'days' + self.cube.coord("time").units = "days" self._check_fails_in_metadata() def test_wrong_parent_time_unit(self): @@ -772,17 +781,17 @@ def test_wrong_parent_time_unit(self): self.cube.attributes["branch_time_in_parent"] = 0.0 self.cube.attributes["branch_time_in_child"] = 0.0 self._check_warnings_on_metadata() - assert self.cube.attributes['branch_time_in_parent'] == 0. - assert self.cube.attributes['branch_time_in_child'] == 0 + assert self.cube.attributes["branch_time_in_parent"] == 0.0 + assert self.cube.attributes["branch_time_in_child"] == 0 def test_time_non_time_units(self): """Test fail for incompatible time units.""" - self.cube.coord('time').units = 'K' + self.cube.coord("time").units = "K" self._check_fails_in_metadata() def test_time_non_monotonic(self): """Test fail for non monotonic times.""" - time = self.cube.coord('time') + time = self.cube.coord("time") points = np.array(time.points) points[-1] = points[0] dims = self.cube.coord_dims(time) @@ -793,124 +802,124 @@ def test_time_non_monotonic(self): def test_bad_standard_name(self): """Fail if coordinates have bad standard names at metadata step.""" - self.cube.coord('time').standard_name = 'region' + self.cube.coord("time").standard_name = "region" self._check_fails_in_metadata() def test_bad_out_name_region_area_type(self): """Debug message if region/area_type AuxCoord has bad var_name at metadata.""" - region_coord = CoordinateInfoMock('basin') - region_coord.standard_name = 'region' - self.var_info.coordinates['region'] = region_coord + region_coord = CoordinateInfoMock("basin") + region_coord.standard_name = "region" + self.var_info.coordinates["region"] = region_coord self.cube = self.get_cube(self.var_info) - self.cube.coord("region").var_name = 'sector' + self.cube.coord("region").var_name = "sector" self._check_debug_messages_on_metadata() def test_bad_out_name_onedim_latitude(self): """Warning if onedimensional lat has bad var_name at metadata""" - self.var_info.table_type = 'CMIP6' - self.cube.coord('latitude').var_name = 'bad_name' + self.var_info.table_type = "CMIP6" + self.cube.coord("latitude").var_name = "bad_name" self._check_fails_in_metadata() def test_bad_out_name_onedim_longitude(self): """Warning if onedimensional lon has bad var_name at metadata""" - self.var_info.table_type = 'CMIP6' - self.cube.coord('longitude').var_name = 'bad_name' + self.var_info.table_type = "CMIP6" + self.cube.coord("longitude").var_name = "bad_name" self._check_fails_in_metadata() def test_bad_out_name_other(self): """Warning if general coordinate has bad var_name at metadata""" - self.var_info.table_type = 'CMIP6' - self.cube.coord('time').var_name = 'bad_name' + self.var_info.table_type = "CMIP6" + self.cube.coord("time").var_name = "bad_name" self._check_fails_in_metadata() def test_bad_out_name(self): """Fail if coordinates have bad short names at metadata step.""" - self.cube.coord('latitude').var_name = 'region' + self.cube.coord("latitude").var_name = "region" self._check_fails_in_metadata() def test_bad_data_units(self): """Fail if data has bad units at metadata step.""" - self.cube.units = 'hPa' + self.cube.units = "hPa" self._check_fails_in_metadata() def test_bad_positive(self): """Fail if positive value is incorrect at metadata step.""" - self.cube.attributes['positive'] = 'up' - self.var_info.positive = 'down' + self.cube.attributes["positive"] = "up" + self.var_info.positive = "down" self._check_fails_in_metadata() def test_bad_standard_name_genlevel(self): """Check if generic level has a different.""" - self.cube.coord('depth').standard_name = None + self.cube.coord("depth").standard_name = None self._check_cube() def test_frequency_month_not_same_day(self): """Fail at metadata if frequency (day) not matches data frequency.""" - self.cube = self.get_cube(self.var_info, frequency='mon') - time = self.cube.coord('time') + self.cube = self.get_cube(self.var_info, frequency="mon") + time = self.cube.coord("time") points = np.array(time.points) points[1] = points[1] + 12 dims = self.cube.coord_dims(time) self.cube.remove_coord(time) self.cube.add_dim_coord(time.copy(points), dims) - self._check_cube(frequency='mon') + self._check_cube(frequency="mon") def test_check_pt_freq(self): """Test checks succeeds for a good Pt frequency.""" - self.var_info.frequency = 'dayPt' + self.var_info.frequency = "dayPt" self._check_cube() def test_check_pt_lowercase_freq(self): """Test checks succeeds for a good Pt frequency.""" - self.var_info.frequency = 'daypt' + self.var_info.frequency = "daypt" self._check_cube() def test_bad_frequency_day(self): """Fail at metadata if frequency (day) not matches data frequency.""" - self.cube = self.get_cube(self.var_info, frequency='mon') - self._check_fails_in_metadata(frequency='day') + self.cube = self.get_cube(self.var_info, frequency="mon") + self._check_fails_in_metadata(frequency="day") def test_bad_frequency_subhr(self): """Fail at metadata if frequency (subhr) not matches data frequency.""" - self._check_fails_in_metadata(frequency='subhr') + self._check_fails_in_metadata(frequency="subhr") def test_bad_frequency_dec(self): """Fail at metadata if frequency (dec) not matches data frequency.""" - self._check_fails_in_metadata(frequency='d') + self._check_fails_in_metadata(frequency="d") def test_bad_frequency_yr(self): """Fail at metadata if frequency (yr) not matches data frequency.""" - self._check_fails_in_metadata(frequency='yr') + self._check_fails_in_metadata(frequency="yr") def test_bad_frequency_mon(self): """Fail at metadata if frequency (mon) not matches data frequency.""" - self._check_fails_in_metadata(frequency='mon') + self._check_fails_in_metadata(frequency="mon") def test_bad_frequency_hourly(self): """Fail at metadata if frequency (3hr) not matches data frequency.""" - self._check_fails_in_metadata(frequency='3hr') + self._check_fails_in_metadata(frequency="3hr") def test_frequency_not_supported(self): """Fail at metadata if frequency is not supported.""" - self._check_fails_in_metadata(frequency='wrong_freq') + self._check_fails_in_metadata(frequency="wrong_freq") def test_hr_mip_cordex(self): """Test hourly CORDEX tables are found.""" - checker = _get_cmor_checker('CORDEX', '3hr', 'tas', '3hr') - assert checker(self.cube)._cmor_var.short_name == 'tas' - assert checker(self.cube)._cmor_var.frequency == '3hr' + checker = _get_cmor_checker("CORDEX", "3hr", "tas", "3hr") + assert checker(self.cube)._cmor_var.short_name == "tas" + assert checker(self.cube)._cmor_var.frequency == "3hr" def test_custom_variable(self): - checker = _get_cmor_checker('OBS', 'Amon', 'uajet', 'mon') - assert checker(self.cube)._cmor_var.short_name == 'uajet' + checker = _get_cmor_checker("OBS", "Amon", "uajet", "mon") + assert checker(self.cube)._cmor_var.short_name == "uajet" assert checker(self.cube)._cmor_var.long_name == ( - 'Jet position expressed as latitude of maximum meridional wind ' - 'speed' + "Jet position expressed as latitude of maximum meridional wind " + "speed" ) - assert checker(self.cube)._cmor_var.units == 'degrees' + assert checker(self.cube)._cmor_var.units == "degrees" def _check_fails_on_data(self): checker = CMORCheck(self.cube, self.var_info) @@ -924,10 +933,12 @@ def _check_warnings_on_data(self): checker.check_data() self.assertTrue(checker.has_warnings()) - def get_cube(self, - var_info, - set_time_units="days since 1850-1-1 00:00:00", - frequency=None): + def get_cube( + self, + var_info, + set_time_units="days since 1850-1-1 00:00:00", + frequency=None, + ): """ Create a cube based on a specification. @@ -952,7 +963,8 @@ def get_cube(self, frequency = var_info.frequency for dim_spec in var_info.coordinates.values(): coord = self._create_coord_from_spec( - dim_spec, set_time_units, frequency) + dim_spec, set_time_units, frequency + ) if dim_spec.value: scalar_coords.append(coord) else: @@ -960,12 +972,13 @@ def get_cube(self, index += 1 valid_min, valid_max = self._get_valid_limits(var_info) - var_data = (np.ones(len(coords) * [20], 'f') * - (valid_min + (valid_max - valid_min) / 2)) + var_data = np.ones(len(coords) * [20], "f") * ( + valid_min + (valid_max - valid_min) / 2 + ) - if var_info.units == 'psu': + if var_info.units == "psu": units = None - attributes = {'invalid_units': 'psu'} + attributes = {"invalid_units": "psu"} else: units = var_info.units attributes = None @@ -979,7 +992,7 @@ def get_cube(self, attributes=attributes, ) if var_info.positive: - cube.attributes['positive'] = var_info.positive + cube.attributes["positive"] = var_info.positive for coord, i in coords: if isinstance(coord, iris.coords.DimCoord): @@ -998,86 +1011,108 @@ def _get_unstructed_grid_cube(self, n_bounds=2): cube = self.get_cube(self.var_info) cube = cube.extract( - iris.Constraint(latitude=cube.coord('latitude').points[0])) - lat_points = cube.coord('longitude').points - lat_points = lat_points / 3.0 - 50. - cube.remove_coord('latitude') - iris.util.demote_dim_coord_to_aux_coord(cube, 'longitude') + iris.Constraint(latitude=cube.coord("latitude").points[0]) + ) + lat_points = cube.coord("longitude").points + lat_points = lat_points / 3.0 - 50.0 + cube.remove_coord("latitude") + iris.util.demote_dim_coord_to_aux_coord(cube, "longitude") lat_points = np.concatenate( ( - cube.coord('longitude').points[0:10] / 4, - cube.coord('longitude').points[0:10] / 4 + cube.coord("longitude").points[0:10] / 4, + cube.coord("longitude").points[0:10] / 4, ), - axis=0 + axis=0, ) lat_bounds = np.concatenate( ( - cube.coord('longitude').bounds[0:10] / 4, - cube.coord('longitude').bounds[0:10] / 4 + cube.coord("longitude").bounds[0:10] / 4, + cube.coord("longitude").bounds[0:10] / 4, ), - axis=0 + axis=0, ) new_lat = iris.coords.AuxCoord( points=lat_points, bounds=lat_bounds, - var_name='lat', - standard_name='latitude', - long_name='Latitude', - units='degrees_north', + var_name="lat", + standard_name="latitude", + long_name="Latitude", + units="degrees_north", ) cube.add_aux_coord(new_lat, 1) # Add additional bound if desired if n_bounds == 3: - for coord_name in ('latitude', 'longitude'): + for coord_name in ("latitude", "longitude"): coord = cube.coord(coord_name) - new_bounds = np.stack(( - coord.bounds[:, 0], - 0.5 * (coord.bounds[:, 0] + coord.bounds[:, 1]), - coord.bounds[:, 1], - )) + new_bounds = np.stack( + ( + coord.bounds[:, 0], + 0.5 * (coord.bounds[:, 0] + coord.bounds[:, 1]), + coord.bounds[:, 1], + ) + ) coord.bounds = np.swapaxes(new_bounds, 0, 1) return cube def _setup_generic_level_var(self): """Setup var_info and cube with generic alevel coordinate.""" - self.var_info.coordinates.pop('depth') - self.var_info.coordinates.pop('air_pressure') + self.var_info.coordinates.pop("depth") + self.var_info.coordinates.pop("air_pressure") # Create cube with sigma coordinate - sigma_coord = CoordinateInfoMock('standard_sigma') - sigma_coord.axis = 'Z' - sigma_coord.out_name = 'lev' - sigma_coord.standard_name = 'atmosphere_sigma_coordinate' - sigma_coord.long_name = 'sigma coordinate' - sigma_coord.generic_lev_name = 'alevel' + sigma_coord = CoordinateInfoMock("standard_sigma") + sigma_coord.axis = "Z" + sigma_coord.out_name = "lev" + sigma_coord.standard_name = "atmosphere_sigma_coordinate" + sigma_coord.long_name = "sigma coordinate" + sigma_coord.generic_lev_name = "alevel" var_info_for_cube = deepcopy(self.var_info) - var_info_for_cube.coordinates['standard_sigma'] = sigma_coord + var_info_for_cube.coordinates["standard_sigma"] = sigma_coord self.cube = self.get_cube(var_info_for_cube) # Create var_info with alevel coord that contains sigma coordinate in # generic_lev_coords dict (just like it is the case for the true CMOR # tables) - gen_lev_coord = CoordinateInfoMock('alevel') + gen_lev_coord = CoordinateInfoMock("alevel") gen_lev_coord.standard_name = None gen_lev_coord.generic_level = True - gen_lev_coord.generic_lev_coords = {'standard_sigma': sigma_coord} - self.var_info.coordinates['alevel'] = gen_lev_coord + gen_lev_coord.generic_lev_coords = {"standard_sigma": sigma_coord} + self.var_info.coordinates["alevel"] = gen_lev_coord def _add_plev_to_cube(self): """Add plev coordinate to cube.""" - if self.cube.coords('atmosphere_sigma_coordinate'): - self.cube.remove_coord('atmosphere_sigma_coordinate') - plevs = [100000.0, 92500.0, 85000.0, 70000.0, 60000.0, 50000.0, - 40000.0, 30000.0, 25000.0, 20000.0, 15000.0, 10000.0, 7000.0, - 5000.0, 3000.0, 2000.0, 1000.0, 900.0, 800.0, 700.0] + if self.cube.coords("atmosphere_sigma_coordinate"): + self.cube.remove_coord("atmosphere_sigma_coordinate") + plevs = [ + 100000.0, + 92500.0, + 85000.0, + 70000.0, + 60000.0, + 50000.0, + 40000.0, + 30000.0, + 25000.0, + 20000.0, + 15000.0, + 10000.0, + 7000.0, + 5000.0, + 3000.0, + 2000.0, + 1000.0, + 900.0, + 800.0, + 700.0, + ] coord = iris.coords.DimCoord( plevs, - var_name='plev', - standard_name='air_pressure', - units='Pa', - attributes={'positive': 'down'}, + var_name="plev", + standard_name="air_pressure", + units="Pa", + attributes={"positive": "down"}, ) coord.guess_bounds() self.cube.add_dim_coord(coord, 3) @@ -1102,7 +1137,8 @@ def _construct_scalar_coord(coord_spec): long_name=coord_spec.long_name, var_name=coord_spec.out_name, units=coord_spec.units, - attributes=None) + attributes=None, + ) def _create_coord_from_spec(self, coord_spec, set_time_units, frequency): if coord_spec.units.startswith("days since "): @@ -1122,12 +1158,12 @@ def _create_coord_from_spec(self, coord_spec, set_time_units, frequency): def _construct_array_coord(self, dim_spec, aux=False): if dim_spec.units.startswith("days since "): values = self._get_time_values(dim_spec) - unit = Unit(dim_spec.units, calendar='360_day') + unit = Unit(dim_spec.units, calendar="360_day") else: values = self._get_values(dim_spec) unit = Unit(dim_spec.units) # Set up attributes dictionary - coord_atts = {'stored_direction': dim_spec.stored_direction} + coord_atts = {"stored_direction": dim_spec.stored_direction} if aux: coord = iris.coords.AuxCoord( values, @@ -1156,7 +1192,8 @@ def _get_values(dim_spec): float(dim_spec.requested[0]) except ValueError: return dim_spec.requested + [ - f'Value{x}' for x in range(len(dim_spec.requested), 20)] + f"Value{x}" for x in range(len(dim_spec.requested), 20) + ] valid_min = dim_spec.valid_min if valid_min: valid_min = float(valid_min) @@ -1167,14 +1204,12 @@ def _get_values(dim_spec): valid_max = float(valid_max) else: valid_max = 100.0 - decreasing = dim_spec.stored_direction == 'decreasing' - endpoint = not dim_spec.standard_name == 'longitude' + decreasing = dim_spec.stored_direction == "decreasing" + endpoint = not dim_spec.standard_name == "longitude" if decreasing: - values = np.linspace( - valid_max, valid_min, 20, endpoint=endpoint) + values = np.linspace(valid_max, valid_min, 20, endpoint=endpoint) else: - values = np.linspace( - valid_min, valid_max, 20, endpoint=endpoint) + values = np.linspace(valid_min, valid_max, 20, endpoint=endpoint) values = np.array(values) if dim_spec.requested: requested = [float(val) for val in dim_spec.requested] @@ -1183,10 +1218,12 @@ def _get_values(dim_spec): values[j] = request if decreasing: extra_values = np.linspace( - len(requested), valid_min, 20 - len(requested)) + len(requested), valid_min, 20 - len(requested) + ) else: extra_values = np.linspace( - len(requested), valid_max, 20 - len(requested)) + len(requested), valid_max, 20 - len(requested) + ) for j in range(len(requested), 20): values[j] = extra_values[j - len(requested)] @@ -1196,20 +1233,20 @@ def _get_values(dim_spec): @staticmethod def _get_time_values(dim_spec): frequency = dim_spec.frequency - if frequency == 'mon': + if frequency == "mon": delta = 30 - elif frequency == 'day': + elif frequency == "day": delta = 1 - elif frequency == 'yr': + elif frequency == "yr": delta = 360 - elif frequency == 'dec': + elif frequency == "dec": delta = 3600 - elif frequency.endswith('hr'): - if frequency == 'hr': - frequency = '1hr' + elif frequency.endswith("hr"): + if frequency == "hr": + frequency = "1hr" delta = float(frequency[:-2]) / 24 else: - raise Exception('Frequency {} not supported'.format(frequency)) + raise Exception("Frequency {} not supported".format(frequency)) start = 0 end = start + delta * 20 return np.arange(start, end, step=delta) @@ -1218,13 +1255,13 @@ def _get_time_values(dim_spec): def test_get_cmor_checker_invalid_project_fail(): """Test ``_get_cmor_checker`` with invalid project.""" with pytest.raises(KeyError): - _get_cmor_checker('INVALID_PROJECT', 'mip', 'short_name', 'frequency') + _get_cmor_checker("INVALID_PROJECT", "mip", "short_name", "frequency") def test_deprecate_automatic_fixes(): """Test deprecation of automatic_fixes.""" with pytest.warns(ESMValCoreDeprecationWarning): - CMORCheck('cube', 'var_info', 'frequency', automatic_fixes=True) + CMORCheck("cube", "var_info", "frequency", automatic_fixes=True) if __name__ == "__main__":