From 3c1f6978ccaf05e3fb4e75ee56216676b7aa4f74 Mon Sep 17 00:00:00 2001 From: Manuel Schlund Date: Fri, 14 Jul 2023 15:34:54 +0200 Subject: [PATCH 01/13] Use 'cell_area' directly as weights for area_statistics --- esmvalcore/preprocessor/_area.py | 265 +++++++++++---------- esmvalcore/preprocessor/_shared.py | 33 +-- tests/unit/preprocessor/_area/test_area.py | 2 +- 3 files changed, 162 insertions(+), 138 deletions(-) diff --git a/esmvalcore/preprocessor/_area.py b/esmvalcore/preprocessor/_area.py index 621a5e47e1..d70a461b1a 100644 --- a/esmvalcore/preprocessor/_area.py +++ b/esmvalcore/preprocessor/_area.py @@ -8,7 +8,7 @@ import logging import warnings from pathlib import Path -from typing import TYPE_CHECKING, Optional +from typing import TYPE_CHECKING, Iterable, Optional import fiona import iris @@ -16,7 +16,7 @@ import shapely import shapely.ops from dask import array as da -from iris.coords import AuxCoord +from iris.coords import AuxCoord, CellMeasure from iris.cube import Cube, CubeList from iris.exceptions import CoordinateNotFoundError @@ -40,30 +40,35 @@ SHAPE_ID_KEYS: tuple[str, ...] = ('name', 'NAME', 'Name', 'id', 'ID') -def extract_region(cube, start_longitude, end_longitude, start_latitude, - end_latitude): +def extract_region( + cube: Cube, + start_longitude: float, + end_longitude: float, + start_latitude: float, + end_latitude: float, +) -> Cube: """Extract a region from a cube. Function that subsets a cube on a box (start_longitude, end_longitude, - start_latitude, end_latitude) + start_latitude, end_latitude). Parameters ---------- - cube: iris.cube.Cube - input data cube. - start_longitude: float + cube: + Input data cube. + start_longitude: Western boundary longitude. - end_longitude: float + end_longitude: Eastern boundary longitude. - start_latitude: float + start_latitude: Southern Boundary latitude. - end_latitude: float + end_latitude: Northern Boundary Latitude. Returns ------- - iris.cube.Cube - smaller cube. + Smaller cube. + """ if abs(start_latitude) > 90.: raise ValueError(f"Invalid start_latitude: {start_latitude}") @@ -129,29 +134,27 @@ def _extract_irregular_region(cube, start_longitude, end_longitude, return cube -def zonal_statistics(cube, operator): +def zonal_statistics(cube: Cube, operator: str) -> Cube: """Compute zonal statistics. Parameters ---------- - cube: iris.cube.Cube - input cube. - - operator: str, optional - Select operator to apply. - Available operators: 'mean', 'median', 'std_dev', 'sum', 'min', - 'max', 'rms'. + cube: + Input cube. + operator: + The operation. Allowed options: `mean`, `median`, `min`, `max`, + `std_dev`, `sum`, `variance`, `rms`. Returns ------- - iris.cube.Cube - Zonal statistics cube. + Zonal statistics cube. Raises ------ ValueError Error raised if computation on irregular grids is attempted. Zonal statistics not yet implemented for irregular grids. + """ if cube.coord('longitude').points.ndim < 2: operation = get_iris_analysis_operation(operator) @@ -162,29 +165,27 @@ def zonal_statistics(cube, operator): raise ValueError(msg) -def meridional_statistics(cube, operator): +def meridional_statistics(cube: Cube, operator: str) -> Cube: """Compute meridional statistics. Parameters ---------- - cube: iris.cube.Cube - input cube. - - operator: str, optional - Select operator to apply. - Available operators: 'mean', 'median', 'std_dev', 'sum', 'min', - 'max', 'rms'. + cube: + Input cube. + operator: + The operation. Allowed options: `mean`, `median`, `min`, `max`, + `std_dev`, `sum`, `variance`, `rms`. Returns ------- - iris.cube.Cube - Meridional statistics cube. + Meridional statistics cube. Raises ------ ValueError Error raised if computation on irregular grids is attempted. Zonal statistics not yet implemented for irregular grids. + """ if cube.coord('latitude').points.ndim < 2: operation = get_iris_analysis_operation(operator) @@ -212,125 +213,149 @@ def compute_area_weights(cube): return weights +def _add_calculated_cell_area(cube): + """Add calculated cell measure 'cell_area' to cube (in-place).""" + assert not cube.cell_measures('cell_area') + + logger.debug( + "Found no or multiple cell measures 'cell_area' in cube %s. Check " + "availability of supplementary variables", + cube.summary(shorten=True), + ) + logger.debug("Attempting to calculate grid cell area") + + regular_grid = all([ + cube.coord('latitude').points.ndim == 1, + cube.coord('longitude').points.ndim == 1, + ]) + irregular_grid_with_grid_lat_lon = all([ + cube.coord('latitude').points.ndim == 2, + cube.coord('longitude').points.ndim == 2, + cube.coords('grid_latitude'), + cube.coords('grid_longitude'), + ]) + + # For regular grids, calculate grid cell areas with iris function + if regular_grid: + cube = guess_bounds(cube, ['latitude', 'longitude']) + logger.debug("Calculating grid cell areas for regular grid") + cell_areas = compute_area_weights(cube) + + # For irregular grids that provide grid_latitude and grid_longitude, use + # these to calculate grid cell areas + elif irregular_grid_with_grid_lat_lon: + cube = guess_bounds(cube, ['grid_latitude', 'grid_longitude']) + cube_tmp = cube.copy() + cube_tmp.remove_coord('latitude') + cube_tmp.coord('grid_latitude').rename('latitude') + cube_tmp.remove_coord('longitude') + cube_tmp.coord('grid_longitude').rename('longitude') + logger.debug("Calculating grid cell areas for irregular grid") + cell_areas = compute_area_weights(cube_tmp) + + # For all other cases, grid cell areas cannot be calculated + else: + logger.error( + "Supplementary variables are needed to calculate grid cell " + "areas for irregular or unstructured grid of cube %s", + cube.summary(shorten=True), + ) + raise iris.exceptions.CoordinateMultiDimError(cube.coord('latitude')) + + # Add new cell measure + cell_measure = CellMeasure( + cell_areas, standard_name='cell_area', units='m2', measure='area', + ) + cube.add_cell_measure(cell_measure, np.arange(cube.ndim)) + + @register_supplementaries( variables=['areacella', 'areacello'], required='prefer_at_least_one', ) -def area_statistics(cube, operator): - """Apply a statistical operator in the horizontal direction. +def area_statistics(cube: Cube, operator): + """Apply a statistical operator in the horizontal plane. - The average in the horizontal direction. We assume that the - horizontal directions are ['longitude', 'latutude']. + We assume that the horizontal directions are ['longitude', 'latitude']. - This function can be used to apply - several different operations in the horizontal plane: mean, standard - deviation, median variance, minimum and maximum. These options are - specified using the `operator` argument and the following key word - arguments: + This function can be used to apply several different operations in the + horizontal plane: mean, standard deviation, median variance, minimum and + maximum. The following options for `operator` are allowed: +------------+--------------------------------------------------+ - | `mean` | Area weighted mean. | + | `mean` | Area weighted mean | +------------+--------------------------------------------------+ | `median` | Median (not area weighted) | +------------+--------------------------------------------------+ | `std_dev` | Standard Deviation (not area weighted) | +------------+--------------------------------------------------+ - | `sum` | Area weighted sum. | + | `sum` | Area weighted sum | +------------+--------------------------------------------------+ | `variance` | Variance (not area weighted) | +------------+--------------------------------------------------+ - | `min`: | Minimum value | + | `min` | Minimum value | +------------+--------------------------------------------------+ | `max` | Maximum value | +------------+--------------------------------------------------+ - | `rms` | Area weighted root mean square. | + | `rms` | Area weighted root mean square | +------------+--------------------------------------------------+ + Note that for sums, the units of the resulting cube will be multiplied by + m:math:`^2`. + Parameters ---------- - cube: iris.cube.Cube - Input cube. The input cube should have a - :class:`iris.coords.CellMeasure` named ``'cell_area'``, unless it - has regular 1D latitude and longitude coordinates so the cell areas - can be computed using - :func:`iris.analysis.cartography.area_weights`. - operator: str - The operation, options: mean, median, min, max, std_dev, sum, - variance, rms. + cube: + Input cube. The input cube should have a + :class:`iris.coords.CellMeasure` named ``'cell_area'``, unless it has + regular 1D latitude and longitude coordinates so the cell areas can be + computed using :func:`iris.analysis.cartography.area_weights`. + operator: + The operation. Allowed options: `mean`, `median`, `min`, `max`, + `std_dev`, `sum`, `variance`, `rms`. Returns ------- - iris.cube.Cube - collapsed cube. + Collapsed cube. Raises ------ iris.exceptions.CoordinateMultiDimError - Exception for latitude axis with dim > 2. - ValueError - if input data cube has different shape than grid area weights + Cube has irregular or unstructured grid but supplementary variable + `cell_area` is not available. + """ original_dtype = cube.dtype - grid_areas = None - try: - grid_areas = cube.cell_measure('cell_area').core_data() - except iris.exceptions.CellMeasureNotFoundError: - logger.debug( - 'Cell measure "cell_area" not found in cube %s. ' - 'Check fx_file availability.', cube.summary(shorten=True)) - logger.debug('Attempting to calculate grid cell area...') - else: - grid_areas = da.broadcast_to(grid_areas, cube.shape) - - if grid_areas is None and cube.coord('latitude').points.ndim == 2: - coord_names = [coord.standard_name for coord in cube.coords()] - if 'grid_latitude' in coord_names and 'grid_longitude' in coord_names: - cube = guess_bounds(cube, ['grid_latitude', 'grid_longitude']) - cube_tmp = cube.copy() - cube_tmp.remove_coord('latitude') - cube_tmp.coord('grid_latitude').rename('latitude') - cube_tmp.remove_coord('longitude') - cube_tmp.coord('grid_longitude').rename('longitude') - grid_areas = compute_area_weights(cube_tmp) - logger.debug('Calculated grid area shape: %s', grid_areas.shape) - else: - logger.error( - 'fx_file needed to calculate grid cell area for irregular ' - 'grids.') - raise iris.exceptions.CoordinateMultiDimError( - cube.coord('latitude')) - - coord_names = ['longitude', 'latitude'] - if grid_areas is None: - cube = guess_bounds(cube, coord_names) - grid_areas = compute_area_weights(cube) - logger.debug('Calculated grid area shape: %s', grid_areas.shape) - - if cube.shape != grid_areas.shape: - raise ValueError('Cube shape ({}) doesn`t match grid area shape ' - '({})'.format(cube.shape, grid_areas.shape)) - operation = get_iris_analysis_operation(operator) + coord_names = ['longitude', 'latitude'] - # TODO: implement weighted stdev, median, s var when available in iris. - # See iris issue: https://github.com/SciTools/iris/issues/3208 - + # Calculate (weighted) statistics if operator_accept_weights(operator): - result = cube.collapsed(coord_names, operation, weights=grid_areas) + # If necessary, try to calculate cell_area (this only works for regular + # grids and certain irregular grids) + if not cube.cell_measures('cell_area'): + _add_calculated_cell_area(cube) + result = cube.collapsed(coord_names, operation, weights='cell_area') else: # Many IRIS analysis functions do not accept weights arguments. + # TODO: implement weighted stdev, median, var when available in iris. + # See iris issue: https://github.com/SciTools/iris/issues/3208 result = cube.collapsed(coord_names, operation) + # Make sure to preserve dtype new_dtype = result.dtype if original_dtype != new_dtype: logger.debug( - "area_statistics changed dtype from " - "%s to %s, changing back", original_dtype, new_dtype) + "area_statistics changed dtype from %s to %s, changing back", + original_dtype, + new_dtype, + ) result.data = result.core_data().astype(original_dtype) + return result -def extract_named_regions(cube, regions): +def extract_named_regions(cube: Cube, regions: str | Iterable[str]) -> Cube: """Extract a specific named region. The region coordinate exist in certain CMIP datasets. @@ -338,15 +363,14 @@ def extract_named_regions(cube, regions): Parameters ---------- - cube: iris.cube.Cube - input cube. - regions: str, list + cube: + Input cube. + regions: A region or list of regions to extract. Returns ------- - iris.cube.Cube - collapsed cube. + Smaller cube. Raises ------ @@ -354,6 +378,7 @@ def extract_named_regions(cube, regions): regions is not list or tuple or set. ValueError region not included in cube. + """ # Make sure regions is a list of strings if isinstance(regions, str): @@ -617,12 +642,11 @@ def fix_coordinate_ordering(cube: Cube) -> Cube: Parameters ---------- - cube: iris.cube.Cube + cube: Input cube. Returns ------- - iris.cube.Cube Cube with dimensions transposed to standard order """ @@ -702,9 +726,9 @@ def extract_shape( Parameters ---------- - cube: iris.cube.Cube + cube: Input cube. - shapefile: str or Path + shapefile: A shapefile defining the region(s) to extract. Also accepts the following strings to load special shapefiles: @@ -712,19 +736,19 @@ def extract_shape( 6 (https://doi.org/10.5281/zenodo.5176260). Should be used in combination with a :obj:`dict` for the argument `ids`, e.g., ``ids={'Acronym': ['GIC', 'WNA']}``. - method: str, optional + method: Select all points contained by the shape or select a single representative point. Choose either `'contains'` or `'representative'`. If `'contains'` is used, but not a single grid point is contained by the shape, a representative point will be selected. - crop: bool, optional + crop: In addition to masking, crop the resulting cube using :func:`~esmvalcore.preprocessor.extract_region`. Data on irregular grids will not be cropped. - decomposed: bool, optional + decomposed: If set to `True`, the output cube will have an additional dimension `shape_id` describing the requested regions. - ids: list or dict or None, optional + ids: Shapes to be read from the shapefile. Can be given as: * :obj:`list`: IDs are assigned from the attributes `name`, `NAME`, @@ -742,10 +766,9 @@ def extract_shape( Returns ------- - iris.cube.Cube Cube containing the extracted region. - See Also + See also -------- extract_region: Extract a region from a cube. diff --git a/esmvalcore/preprocessor/_shared.py b/esmvalcore/preprocessor/_shared.py index 89b6d13b32..9601fe23ab 100644 --- a/esmvalcore/preprocessor/_shared.py +++ b/esmvalcore/preprocessor/_shared.py @@ -21,51 +21,52 @@ def guess_bounds(cube, coords): return cube -def get_iris_analysis_operation(operator): - """ - Determine the iris analysis operator from a string. +def get_iris_analysis_operation(operator: str) -> iris.analysis.Aggregator: + """Determine the iris analysis operator from a :obj:`str`. Map string to functional operator. Parameters ---------- - operator: str + operator: A named operator. Returns ------- - function: A function from iris.analysis + Object that can be used within :meth:`iris.cube.Cube.collapsed`, + :meth:`iris.cube.Cube.aggregated_by`, or + :meth:`iris.cube.Cube.rolling_window`. Raises ------ ValueError - operator not in allowed operators list. - allowed operators: mean, median, std_dev, sum, variance, min, max, rms + An invalid operator is specified. Allowed options: `mean`, `median`, + `std_dev`, `sum`, `variance`, `min`, `max`, `rms`. """ operators = [ - 'mean', 'median', 'std_dev', 'sum', 'variance', 'min', 'max', 'rms' + 'mean', 'median', 'std_dev', 'sum', 'variance', 'min', 'max', 'rms', ] operator = operator.lower() if operator not in operators: - raise ValueError("operator {} not recognised. " - "Accepted values are: {}." - "".format(operator, ', '.join(operators))) + raise ValueError( + f"operator '{operator}' not recognised. Accepted values are: " + f"{', '.join(operators)}." + ) operation = getattr(iris.analysis, operator.upper()) return operation -def operator_accept_weights(operator): - """ - Get if operator support weights. +def operator_accept_weights(operator: str) -> bool: + """Get if operator support weights. Parameters ---------- - operator: str + operator: A named operator. Returns ------- - bool: True if operator support weights, False otherwise + ``True`` if operator support weights, ``False`` otherwise. """ return operator.lower() in ('mean', 'sum', 'rms') diff --git a/tests/unit/preprocessor/_area/test_area.py b/tests/unit/preprocessor/_area/test_area.py index cabaadf396..e0592059e5 100644 --- a/tests/unit/preprocessor/_area/test_area.py +++ b/tests/unit/preprocessor/_area/test_area.py @@ -27,7 +27,7 @@ class Test(tests.Test): - """Test class for the :func:`esmvalcore.preprocessor._area_pp` module.""" + """Test class for the :func:`esmvalcore.preprocessor._area` module.""" def setUp(self): """Prepare tests.""" self.coord_sys = iris.coord_systems.GeogCS(EARTH_RADIUS) From b0852f17d14e0e41abd87619be8294b0593d6991 Mon Sep 17 00:00:00 2001 From: Manuel Schlund Date: Fri, 14 Jul 2023 16:02:59 +0200 Subject: [PATCH 02/13] Small doc changes --- esmvalcore/preprocessor/_area.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/esmvalcore/preprocessor/_area.py b/esmvalcore/preprocessor/_area.py index d70a461b1a..6ea3be00ce 100644 --- a/esmvalcore/preprocessor/_area.py +++ b/esmvalcore/preprocessor/_area.py @@ -241,8 +241,8 @@ def _add_calculated_cell_area(cube): logger.debug("Calculating grid cell areas for regular grid") cell_areas = compute_area_weights(cube) - # For irregular grids that provide grid_latitude and grid_longitude, use - # these to calculate grid cell areas + # For rotated pole grids, use grid_latitude and grid_longitude to calculate + # grid cell areas elif irregular_grid_with_grid_lat_lon: cube = guess_bounds(cube, ['grid_latitude', 'grid_longitude']) cube_tmp = cube.copy() @@ -768,7 +768,7 @@ def extract_shape( ------- Cube containing the extracted region. - See also + See Also -------- extract_region: Extract a region from a cube. From d49b4f67a229da3bd10d8883b472620a75abc347 Mon Sep 17 00:00:00 2001 From: Manuel Schlund Date: Fri, 14 Jul 2023 16:54:12 +0200 Subject: [PATCH 03/13] Added tests for area_statistics --- esmvalcore/preprocessor/_area.py | 32 +++--- tests/unit/preprocessor/_area/test_area.py | 107 ++++++++++++++++++--- 2 files changed, 115 insertions(+), 24 deletions(-) diff --git a/esmvalcore/preprocessor/_area.py b/esmvalcore/preprocessor/_area.py index 6ea3be00ce..05ee8033c5 100644 --- a/esmvalcore/preprocessor/_area.py +++ b/esmvalcore/preprocessor/_area.py @@ -18,7 +18,7 @@ from dask import array as da from iris.coords import AuxCoord, CellMeasure from iris.cube import Cube, CubeList -from iris.exceptions import CoordinateNotFoundError +from iris.exceptions import CoordinateMultiDimError, CoordinateNotFoundError from ._shared import ( get_iris_analysis_operation, @@ -67,7 +67,8 @@ def extract_region( Returns ------- - Smaller cube. + iris.cube.Cube + Smaller cube. """ if abs(start_latitude) > 90.: @@ -147,7 +148,8 @@ def zonal_statistics(cube: Cube, operator: str) -> Cube: Returns ------- - Zonal statistics cube. + iris.cube.Cube + Zonal statistics cube. Raises ------ @@ -178,7 +180,8 @@ def meridional_statistics(cube: Cube, operator: str) -> Cube: Returns ------- - Meridional statistics cube. + iris.cube.Cube + Meridional statistics cube. Raises ------ @@ -227,8 +230,9 @@ def _add_calculated_cell_area(cube): regular_grid = all([ cube.coord('latitude').points.ndim == 1, cube.coord('longitude').points.ndim == 1, + cube.coord_dims('latitude') != cube.coord_dims('longitude'), ]) - irregular_grid_with_grid_lat_lon = all([ + rotated_pole_grid = all([ cube.coord('latitude').points.ndim == 2, cube.coord('longitude').points.ndim == 2, cube.coords('grid_latitude'), @@ -243,14 +247,14 @@ def _add_calculated_cell_area(cube): # For rotated pole grids, use grid_latitude and grid_longitude to calculate # grid cell areas - elif irregular_grid_with_grid_lat_lon: + elif rotated_pole_grid: cube = guess_bounds(cube, ['grid_latitude', 'grid_longitude']) cube_tmp = cube.copy() cube_tmp.remove_coord('latitude') cube_tmp.coord('grid_latitude').rename('latitude') cube_tmp.remove_coord('longitude') cube_tmp.coord('grid_longitude').rename('longitude') - logger.debug("Calculating grid cell areas for irregular grid") + logger.debug("Calculating grid cell areas for rotated pole grid") cell_areas = compute_area_weights(cube_tmp) # For all other cases, grid cell areas cannot be calculated @@ -260,7 +264,7 @@ def _add_calculated_cell_area(cube): "areas for irregular or unstructured grid of cube %s", cube.summary(shorten=True), ) - raise iris.exceptions.CoordinateMultiDimError(cube.coord('latitude')) + raise CoordinateMultiDimError(cube.coord('latitude')) # Add new cell measure cell_measure = CellMeasure( @@ -273,7 +277,7 @@ def _add_calculated_cell_area(cube): variables=['areacella', 'areacello'], required='prefer_at_least_one', ) -def area_statistics(cube: Cube, operator): +def area_statistics(cube: Cube, operator: str) -> Cube: """Apply a statistical operator in the horizontal plane. We assume that the horizontal directions are ['longitude', 'latitude']. @@ -316,7 +320,8 @@ def area_statistics(cube: Cube, operator): Returns ------- - Collapsed cube. + iris.cube.Cube + Collapsed cube. Raises ------ @@ -332,7 +337,7 @@ def area_statistics(cube: Cube, operator): # Calculate (weighted) statistics if operator_accept_weights(operator): # If necessary, try to calculate cell_area (this only works for regular - # grids and certain irregular grids) + # grids and certain irregular grids, and fails for others) if not cube.cell_measures('cell_area'): _add_calculated_cell_area(cube) result = cube.collapsed(coord_names, operation, weights='cell_area') @@ -370,7 +375,8 @@ def extract_named_regions(cube: Cube, regions: str | Iterable[str]) -> Cube: Returns ------- - Smaller cube. + iris.cube.Cube + Smaller cube. Raises ------ @@ -647,6 +653,7 @@ def fix_coordinate_ordering(cube: Cube) -> Cube: Returns ------- + iris.cube.Cube Cube with dimensions transposed to standard order """ @@ -766,6 +773,7 @@ def extract_shape( Returns ------- + iris.cube.Cube Cube containing the extracted region. See Also diff --git a/tests/unit/preprocessor/_area/test_area.py b/tests/unit/preprocessor/_area/test_area.py index e0592059e5..dc4c32adf2 100644 --- a/tests/unit/preprocessor/_area/test_area.py +++ b/tests/unit/preprocessor/_area/test_area.py @@ -8,6 +8,7 @@ import pytest from cf_units import Unit from iris.cube import Cube +from iris.exceptions import CoordinateMultiDimError from iris.fileformats.pp import EARTH_RADIUS from numpy.testing._private.utils import assert_raises from shapely.geometry import Polygon, mapping @@ -46,7 +47,11 @@ def setUp(self): coord_system=self.coord_sys, ) coords_spec = [(lats, 0), (lons, 1)] - self.grid = iris.cube.Cube(data, dim_coords_and_dims=coords_spec) + self.grid = iris.cube.Cube( + data, + dim_coords_and_dims=coords_spec, + units='kg m-2 s-1', + ) ndata = np.ones((6, 6)) nlons = iris.coords.DimCoord( @@ -63,55 +68,68 @@ def setUp(self): coord_system=self.coord_sys, ) coords_spec = [(nlats, 0), (nlons, 1)] - self.negative_grid = iris.cube.Cube(ndata, - dim_coords_and_dims=coords_spec) + self.negative_grid = iris.cube.Cube( + ndata, + dim_coords_and_dims=coords_spec, + units='kg m-2 s-1', + ) + + def _add_cell_measure_to_grid(self): + """Add cell_area to self.grid.""" + cube = guess_bounds(self.grid, ['longitude', 'latitude']) + grid_areas = iris.analysis.cartography.area_weights(cube) + measure = iris.coords.CellMeasure( + grid_areas, + standard_name='cell_area', + units='m2', + measure='area') + self.grid.add_cell_measure(measure, range(0, measure.ndim)) def test_area_statistics_mean(self): """Test for area average of a 2D field.""" result = area_statistics(self.grid, 'mean') expected = np.array([1.], dtype=np.float32) self.assert_array_equal(result.data, expected) + self.assertEqual(result.units, 'kg m-2 s-1') def test_area_statistics_cell_measure_mean(self): """Test for area average of a 2D field. - The area measure is pre-loaded in the cube + The area measure is pre-loaded in the cube. """ - cube = guess_bounds(self.grid, ['longitude', 'latitude']) - grid_areas = iris.analysis.cartography.area_weights(cube) - measure = iris.coords.CellMeasure( - grid_areas, - standard_name='cell_area', - units='m2', - measure='area') - self.grid.add_cell_measure(measure, range(0, measure.ndim)) + self._add_cell_measure_to_grid() result = area_statistics(self.grid, 'mean') expected = np.array([1.], dtype=np.float32) self.assert_array_equal(result.data, expected) + self.assertEqual(result.units, 'kg m-2 s-1') def test_area_statistics_min(self): """Test for area average of a 2D field.""" result = area_statistics(self.grid, 'min') expected = np.array([1.], dtype=np.float32) self.assert_array_equal(result.data, expected) + self.assertEqual(result.units, 'kg m-2 s-1') def test_area_statistics_max(self): """Test for area average of a 2D field.""" result = area_statistics(self.grid, 'max') expected = np.array([1.], dtype=np.float32) self.assert_array_equal(result.data, expected) + self.assertEqual(result.units, 'kg m-2 s-1') def test_area_statistics_median(self): """Test for area average of a 2D field.""" result = area_statistics(self.grid, 'median') expected = np.array([1.], dtype=np.float32) self.assert_array_equal(result.data, expected) + self.assertEqual(result.units, 'kg m-2 s-1') def test_area_statistics_std_dev(self): """Test for area average of a 2D field.""" result = area_statistics(self.grid, 'std_dev') expected = np.array([0.], dtype=np.float32) self.assert_array_equal(result.data, expected) + self.assertEqual(result.units, 'kg m-2 s-1') def test_area_statistics_sum(self): """Test for sum of a 2D field.""" @@ -119,24 +137,40 @@ def test_area_statistics_sum(self): grid_areas = iris.analysis.cartography.area_weights(self.grid) expected = np.sum(grid_areas).astype(np.float32) self.assert_array_equal(result.data, expected) + self.assertEqual(result.units, 'kg s-1') + + def test_area_statistics_cell_measure_sum(self): + """Test for area sum of a 2D field. + + The area measure is pre-loaded in the cube. + """ + self._add_cell_measure_to_grid() + grid_areas = iris.analysis.cartography.area_weights(self.grid) + result = area_statistics(self.grid, 'sum') + expected = np.sum(grid_areas).astype(np.float32) + self.assert_array_equal(result.data, expected) + self.assertEqual(result.units, 'kg s-1') def test_area_statistics_variance(self): """Test for area average of a 2D field.""" result = area_statistics(self.grid, 'variance') expected = np.array([0.], dtype=np.float32) self.assert_array_equal(result.data, expected) + self.assertEqual(result.units, 'kg2 m-4 s-2') def test_area_statistics_neg_lon(self): """Test for area average of a 2D field.""" result = area_statistics(self.negative_grid, 'mean') expected = np.array([1.], dtype=np.float32) self.assert_array_equal(result.data, expected) + self.assertEqual(result.units, 'kg m-2 s-1') def test_area_statistics_rms(self): """Test for area rms of a 2D field.""" result = area_statistics(self.grid, 'rms') expected = np.array([1.], dtype=np.float32) self.assert_array_equal(result.data, expected) + self.assertEqual(result.units, 'kg m-2 s-1') def test_extract_region(self): """Test for extracting a region from a 2D field.""" @@ -501,6 +535,55 @@ def test_area_statistics_rotated(case): np.testing.assert_array_equal(cube.data, expected) +def create_unstructured_grid_cube(): + """Create test cube with unstructured grid.""" + lat = iris.coords.AuxCoord( + [0, 1, 2], var_name='lat', standard_name='latitude', units='degrees', + ) + lon = iris.coords.AuxCoord( + [0, 1, 2], var_name='lon', standard_name='longitude', units='degrees', + ) + cube = iris.cube.Cube( + [0, 10, 20], + var_name='tas', + units='K', + aux_coords_and_dims=[(lat, 0), (lon, 0)], + ) + return cube + + +def test_area_statistics_max_irregular_grid(): + """Test ``area_statistics``.""" + values = np.arange(12).reshape(2, 2, 3) + cube = create_irregular_grid_cube(values, values[0, ...], values[0, ...]) + result = area_statistics(cube, 'max') + assert isinstance(result, Cube) + np.testing.assert_array_equal(result.data, [5, 11]) + + +def test_area_statistics_max_unstructured_grid(): + """Test ``area_statistics``.""" + cube = create_unstructured_grid_cube() + result = area_statistics(cube, 'max') + assert isinstance(result, Cube) + np.testing.assert_array_equal(result.data, 20) + + +def test_area_statistics_sum_irregular_grid_fail(): + """Test ``area_statistics``.""" + values = np.arange(12).reshape(2, 2, 3) + cube = create_irregular_grid_cube(values, values[0, ...], values[0, ...]) + with pytest.raises(CoordinateMultiDimError): + area_statistics(cube, 'sum') + + +def test_area_statistics_sum_unstructured_grid_fail(): + """Test ``area_statistics``.""" + cube = create_unstructured_grid_cube() + with pytest.raises(CoordinateMultiDimError): + area_statistics(cube, 'sum') + + @pytest.fixture def make_testcube(): """Create a test cube on a Cartesian grid.""" From 941e94cf590986ab7be4a0d4b6b87a59f535e360 Mon Sep 17 00:00:00 2001 From: Manuel Schlund Date: Fri, 14 Jul 2023 17:48:35 +0200 Subject: [PATCH 04/13] Expanded test to check if differing shape is ok --- tests/unit/preprocessor/_area/test_area.py | 41 +++++++++++++--------- 1 file changed, 24 insertions(+), 17 deletions(-) diff --git a/tests/unit/preprocessor/_area/test_area.py b/tests/unit/preprocessor/_area/test_area.py index dc4c32adf2..c002b8db80 100644 --- a/tests/unit/preprocessor/_area/test_area.py +++ b/tests/unit/preprocessor/_area/test_area.py @@ -32,7 +32,12 @@ class Test(tests.Test): def setUp(self): """Prepare tests.""" self.coord_sys = iris.coord_systems.GeogCS(EARTH_RADIUS) - data = np.ones((5, 5), dtype=np.float32) + data = np.ones((2, 5, 5), dtype=np.float32) + times = iris.coords.DimCoord( + [0, 1], + standard_name='time', + units='days since 2000-01-01', + ) lons = iris.coords.DimCoord( [i + .5 for i in range(5)], standard_name='longitude', @@ -46,7 +51,7 @@ def setUp(self): units='degrees_north', coord_system=self.coord_sys, ) - coords_spec = [(lats, 0), (lons, 1)] + coords_spec = [(times, 0), (lats, 1), (lons, 2)] self.grid = iris.cube.Cube( data, dim_coords_and_dims=coords_spec, @@ -77,18 +82,18 @@ def setUp(self): def _add_cell_measure_to_grid(self): """Add cell_area to self.grid.""" cube = guess_bounds(self.grid, ['longitude', 'latitude']) - grid_areas = iris.analysis.cartography.area_weights(cube) + grid_areas = iris.analysis.cartography.area_weights(cube)[0] measure = iris.coords.CellMeasure( grid_areas, standard_name='cell_area', units='m2', measure='area') - self.grid.add_cell_measure(measure, range(0, measure.ndim)) + self.grid.add_cell_measure(measure, (1, 2)) def test_area_statistics_mean(self): """Test for area average of a 2D field.""" result = area_statistics(self.grid, 'mean') - expected = np.array([1.], dtype=np.float32) + expected = np.ma.array([1., 1.], dtype=np.float32) self.assert_array_equal(result.data, expected) self.assertEqual(result.units, 'kg m-2 s-1') @@ -99,35 +104,35 @@ def test_area_statistics_cell_measure_mean(self): """ self._add_cell_measure_to_grid() result = area_statistics(self.grid, 'mean') - expected = np.array([1.], dtype=np.float32) + expected = np.ma.array([1., 1.], dtype=np.float32) self.assert_array_equal(result.data, expected) self.assertEqual(result.units, 'kg m-2 s-1') def test_area_statistics_min(self): """Test for area average of a 2D field.""" result = area_statistics(self.grid, 'min') - expected = np.array([1.], dtype=np.float32) + expected = np.ma.array([1., 1.], dtype=np.float32) self.assert_array_equal(result.data, expected) self.assertEqual(result.units, 'kg m-2 s-1') def test_area_statistics_max(self): """Test for area average of a 2D field.""" result = area_statistics(self.grid, 'max') - expected = np.array([1.], dtype=np.float32) + expected = np.ma.array([1., 1.], dtype=np.float32) self.assert_array_equal(result.data, expected) self.assertEqual(result.units, 'kg m-2 s-1') def test_area_statistics_median(self): """Test for area average of a 2D field.""" result = area_statistics(self.grid, 'median') - expected = np.array([1.], dtype=np.float32) + expected = np.ma.array([1., 1.], dtype=np.float32) self.assert_array_equal(result.data, expected) self.assertEqual(result.units, 'kg m-2 s-1') def test_area_statistics_std_dev(self): """Test for area average of a 2D field.""" result = area_statistics(self.grid, 'std_dev') - expected = np.array([0.], dtype=np.float32) + expected = np.ma.array([0., 0.], dtype=np.float32) self.assert_array_equal(result.data, expected) self.assertEqual(result.units, 'kg m-2 s-1') @@ -135,7 +140,8 @@ def test_area_statistics_sum(self): """Test for sum of a 2D field.""" result = area_statistics(self.grid, 'sum') grid_areas = iris.analysis.cartography.area_weights(self.grid) - expected = np.sum(grid_areas).astype(np.float32) + grid_sum = np.sum(grid_areas[0]) + expected = np.array([grid_sum, grid_sum]).astype(np.float32) self.assert_array_equal(result.data, expected) self.assertEqual(result.units, 'kg s-1') @@ -147,14 +153,15 @@ def test_area_statistics_cell_measure_sum(self): self._add_cell_measure_to_grid() grid_areas = iris.analysis.cartography.area_weights(self.grid) result = area_statistics(self.grid, 'sum') - expected = np.sum(grid_areas).astype(np.float32) + grid_sum = np.sum(grid_areas[0]) + expected = np.array([grid_sum, grid_sum]).astype(np.float32) self.assert_array_equal(result.data, expected) self.assertEqual(result.units, 'kg s-1') def test_area_statistics_variance(self): """Test for area average of a 2D field.""" result = area_statistics(self.grid, 'variance') - expected = np.array([0.], dtype=np.float32) + expected = np.ma.array([0., 0.], dtype=np.float32) self.assert_array_equal(result.data, expected) self.assertEqual(result.units, 'kg2 m-4 s-2') @@ -168,7 +175,7 @@ def test_area_statistics_neg_lon(self): def test_area_statistics_rms(self): """Test for area rms of a 2D field.""" result = area_statistics(self.grid, 'rms') - expected = np.array([1.], dtype=np.float32) + expected = np.ma.array([1., 1.], dtype=np.float32) self.assert_array_equal(result.data, expected) self.assertEqual(result.units, 'kg m-2 s-1') @@ -176,7 +183,7 @@ def test_extract_region(self): """Test for extracting a region from a 2D field.""" result = extract_region(self.grid, 1.5, 2.5, 1.5, 2.5) # expected outcome - expected = np.ones((2, 2)) + expected = np.ones((2, 2, 2)) self.assert_array_equal(result.data, expected) def test_extract_region_mean(self): @@ -192,10 +199,10 @@ def test_extract_region_mean(self): self.grid.add_cell_measure(measure, range(0, measure.ndim)) region = extract_region(self.grid, 1.5, 2.5, 1.5, 2.5) # expected outcome - expected = np.ones((2, 2)) + expected = np.ones((2, 2, 2)) self.assert_array_equal(region.data, expected) result = area_statistics(region, 'mean') - expected_mean = np.array([1.]) + expected_mean = np.ma.array([1., 1.]) self.assert_array_equal(result.data, expected_mean) def test_extract_region_neg_lon(self): From 42676e1fbe3c4942791c08df2cf0b670d8a98af7 Mon Sep 17 00:00:00 2001 From: Manuel Schlund Date: Mon, 17 Jul 2023 12:13:16 +0200 Subject: [PATCH 05/13] Smarter weights for volume_statistics --- esmvalcore/preprocessor/_area.py | 10 +- esmvalcore/preprocessor/_volume.py | 122 +++++++++++------- .../unit/preprocessor/_volume/test_volume.py | 73 ++++++++++- 3 files changed, 147 insertions(+), 58 deletions(-) diff --git a/esmvalcore/preprocessor/_area.py b/esmvalcore/preprocessor/_area.py index 05ee8033c5..5e2040dcb9 100644 --- a/esmvalcore/preprocessor/_area.py +++ b/esmvalcore/preprocessor/_area.py @@ -216,13 +216,13 @@ def compute_area_weights(cube): return weights -def _add_calculated_cell_area(cube): - """Add calculated cell measure 'cell_area' to cube (in-place).""" +def _try_adding_calculated_cell_area(cube: Cube) -> None: + """Try to add calculated cell measure 'cell_area' to cube (in-place).""" assert not cube.cell_measures('cell_area') logger.debug( - "Found no or multiple cell measures 'cell_area' in cube %s. Check " - "availability of supplementary variables", + "Found no cell measure 'cell_area' in cube %s. Check availability of " + "supplementary variables", cube.summary(shorten=True), ) logger.debug("Attempting to calculate grid cell area") @@ -339,7 +339,7 @@ def area_statistics(cube: Cube, operator: str) -> Cube: # If necessary, try to calculate cell_area (this only works for regular # grids and certain irregular grids, and fails for others) if not cube.cell_measures('cell_area'): - _add_calculated_cell_area(cube) + _try_adding_calculated_cell_area(cube) result = cube.collapsed(coord_names, operation, weights='cell_area') else: # Many IRIS analysis functions do not accept weights arguments. diff --git a/esmvalcore/preprocessor/_volume.py b/esmvalcore/preprocessor/_volume.py index 52b9393235..7e45a9b42f 100644 --- a/esmvalcore/preprocessor/_volume.py +++ b/esmvalcore/preprocessor/_volume.py @@ -8,7 +8,11 @@ import dask.array as da import iris import numpy as np +from iris.coords import CellMeasure +from iris.cube import Cube +from iris.exceptions import CoordinateMultiDimError +from ._area import compute_area_weights from ._shared import get_iris_analysis_operation, operator_accept_weights from ._supplementary_vars import register_supplementaries @@ -48,6 +52,7 @@ def extract_volume( 'left_closed' or 'right_closed'. nearest_value: bool extracts considering the nearest value of z-coord to z_min and z_max. + Returns ------- iris.cube.Cube @@ -87,10 +92,15 @@ def extract_volume( return cube.extract(z_constraint) -def calculate_volume(cube): +def calculate_volume(cube: Cube) -> da.core.Array: """Calculate volume from a cube. - This function is used when the volume ancillary variables can't be found. + This function is used when the 'ocean_volume' cell measure can't be found. + + Note + ---- + This only works if the grid cell areas can be calculated (i.e., latitude + and longitude are 1D) and if the depth coordinate is 1D or 4D. Parameters ---------- @@ -99,89 +109,103 @@ def calculate_volume(cube): Returns ------- - float - grid volume. + dask.array.core.Array + Grid volumes. + """ - # #### - # Load depth field and figure out which dim is which. + # Load depth field and figure out which dim is which depth = cube.coord(axis='z') - z_dim = cube.coord_dims(cube.coord(axis='z'))[0] + z_dim = cube.coord_dims(depth)[0] - # #### - # Load z direction thickness + # Calculate Z-direction thickness thickness = depth.bounds[..., 1] - depth.bounds[..., 0] - # #### - # Calculate grid volume: - area = da.array(iris.analysis.cartography.area_weights(cube)) + # Try to calculate grid cell area + try: + area = da.array(compute_area_weights(cube)) + except CoordinateMultiDimError: + logger.error( + "Supplementary variables are needed to calculate grid cell " + "areas for irregular grid of cube %s", + cube.summary(shorten=True), + ) + raise + + # Try to calculate grid cell volume as area * thickness if thickness.ndim == 1 and z_dim == 1: grid_volume = area * thickness[None, :, None, None] - if thickness.ndim == 4 and z_dim == 1: + elif thickness.ndim == 4 and z_dim == 1: grid_volume = area * thickness[:, :] + else: + raise ValueError( + f"Supplementary variables are needed to calculate grid cell " + f"volumes for cubes with {thickness.ndim:d}D depth coordinate, " + f"got cube {cube.summary(shorten=True)}" + ) return grid_volume +def _try_adding_calculated_ocean_volume(cube: Cube) -> None: + """Try to add calculated cell measure 'ocean_volume' to cube (in-place).""" + logger.debug( + "Found no cell measure 'ocean_volume' in cube %s. Check availability " + "of supplementary variables", + cube.summary(shorten=True), + ) + logger.debug("Attempting to calculate grid cell volume") + + grid_volume = calculate_volume(cube) + + cell_measure = CellMeasure( + grid_volume, + standard_name='ocean_volume', + units='m3', + measure='volume', + ) + cube.add_cell_measure(cell_measure, np.arange(cube.ndim)) + + @register_supplementaries( variables=['volcello'], required='prefer_at_least_one', ) -def volume_statistics(cube, operator): +def volume_statistics(cube: Cube, operator: str) -> Cube: """Apply a statistical operation over a volume. The volume average is weighted according to the cell volume. Parameters ---------- - cube: iris.cube.Cube + cube: Input cube. The input cube should have a - :class:`iris.coords.CellMeasure` with standard name ``'ocean_volume'``, - unless it has regular 1D latitude and longitude coordinates so the cell - volumes can be computed by using - :func:`iris.analysis.cartography.area_weights` to compute the cell - areas and multiplying those by the cell thickness, computed from the - bounds of the vertical coordinate. - operator: str - The operation to apply to the cube, options are: 'mean'. + :class:`iris.coords.CellMeasure` named ``'ocean_volume'``, unless it + has regular 1D latitude and longitude coordinates so the cell volumes + can be computed by using :func:`iris.analysis.cartography.area_weights` + to compute the cell areas and multiplying those by the cell thickness, + computed from the bounds of the vertical coordinate. + operator: + The operation. Allowed options are: `mean`. Returns ------- iris.cube.Cube - collapsed cube. + Collapsed cube. - Raises - ------ - ValueError - if input cube shape differs from grid volume cube shape. """ # TODO: Test sigma coordinates. # TODO: Add other operations. - if operator != 'mean': - raise ValueError(f'Volume operator {operator} not recognised.') - - try: - grid_volume = cube.cell_measure('ocean_volume').core_data() - except iris.exceptions.CellMeasureNotFoundError: - logger.debug('Cell measure "ocean_volume" not found in cube. ' - 'Check fx_file availability.') - logger.debug('Attempting to calculate grid cell volume...') - grid_volume = calculate_volume(cube) - else: - grid_volume = da.broadcast_to(grid_volume, cube.shape) + raise ValueError(f"Volume operator {operator} not recognised.") - if cube.data.shape != grid_volume.shape: - raise ValueError('Cube shape ({}) doesn`t match grid volume shape ' - f'({cube.shape, grid_volume.shape})') + if not cube.cell_measures('ocean_volume'): + _try_adding_calculated_ocean_volume(cube) - masked_volume = da.ma.masked_where(da.ma.getmaskarray(cube.lazy_data()), - grid_volume) result = cube.collapsed( - [cube.coord(axis='Z'), - cube.coord(axis='Y'), - cube.coord(axis='X')], + [cube.coord(axis='Z'), cube.coord(axis='Y'), cube.coord(axis='X')], iris.analysis.MEAN, - weights=masked_volume) + weights='ocean_volume', + ) return result diff --git a/tests/unit/preprocessor/_volume/test_volume.py b/tests/unit/preprocessor/_volume/test_volume.py index 45f4b286fd..474a03e8d3 100644 --- a/tests/unit/preprocessor/_volume/test_volume.py +++ b/tests/unit/preprocessor/_volume/test_volume.py @@ -3,8 +3,10 @@ import unittest import iris +import iris.fileformats import numpy as np from cf_units import Unit +from iris.exceptions import CoordinateMultiDimError import tests from esmvalcore.preprocessor._volume import ( @@ -53,6 +55,16 @@ def setUp(self): [25., 250.]], units='m', attributes={'positive': 'down'}) + zcoord_4d = iris.coords.AuxCoord( + np.broadcast_to([[[[0.5]], [[5.]], [[50.]]]], (2, 3, 2, 2)), + long_name='zcoord', + bounds=np.broadcast_to( + [[[[[0., 2.5]]], [[[2.5, 25.]]], [[[25., 250.]]]]], + (2, 3, 2, 2, 2), + ), + units='m', + attributes={'positive': 'down'}, + ) lons2 = iris.coords.DimCoord([1.5, 2.5], standard_name='longitude', bounds=[[1., 2.], [2., 3.]], @@ -68,16 +80,31 @@ def setUp(self): self.grid_3d = iris.cube.Cube(data1, dim_coords_and_dims=coords_spec3) coords_spec4 = [(time, 0), (zcoord, 1), (lats2, 2), (lons2, 3)] - self.grid_4d = iris.cube.Cube(data2, dim_coords_and_dims=coords_spec4) + self.grid_4d = iris.cube.Cube( + data2, + dim_coords_and_dims=coords_spec4, + units='kg m-3', + ) coords_spec5 = [(time2, 0), (zcoord, 1), (lats2, 2), (lons2, 3)] - self.grid_4d_2 = iris.cube.Cube(data3, - dim_coords_and_dims=coords_spec5) + self.grid_4d_2 = iris.cube.Cube( + data3, + dim_coords_and_dims=coords_spec5, + units='kg m-3', + ) + + self.grid_4d_z = iris.cube.Cube( + data2, + dim_coords_and_dims=[(time, 0), (lats2, 2), (lons2, 3)], + aux_coords_and_dims=[(zcoord_4d, (0, 1, 2, 3))], + units='kg m-3', + ) # allow iris to figure out the axis='z' coordinate iris.util.guess_coord_axis(self.grid_3d.coord('zcoord')) iris.util.guess_coord_axis(self.grid_4d.coord('zcoord')) iris.util.guess_coord_axis(self.grid_4d_2.coord('zcoord')) + iris.util.guess_coord_axis(self.grid_4d_z.coord('zcoord')) def test_axis_statistics_mean(self): """Test axis statistics with operator mean.""" @@ -260,12 +287,14 @@ def test_extract_volume_mean(self): result_mean = volume_statistics(result, 'mean') expected_mean = np.ma.array([1., 1.], mask=False) self.assert_array_equal(result_mean.data, expected_mean) + self.assertEqual(result_mean.units, 'kg m-3') def test_volume_statistics(self): """Test to take the volume weighted average of a (2,3,2,2) cube.""" result = volume_statistics(self.grid_4d, 'mean') expected = np.ma.array([1., 1.], mask=False) self.assert_array_equal(result.data, expected) + self.assertEqual(result.units, 'kg m-3') def test_volume_statistics_cell_measure(self): """Test to take the volume weighted average of a (2,3,2,2) cube. @@ -281,6 +310,7 @@ def test_volume_statistics_cell_measure(self): result = volume_statistics(self.grid_4d, 'mean') expected = np.ma.array([1., 1.], mask=False) self.assert_array_equal(result.data, expected) + self.assertEqual(result.units, 'kg m-3') def test_volume_statistics_long(self): """Test to take the volume weighted average of a (4,3,2,2) cube. @@ -291,6 +321,7 @@ def test_volume_statistics_long(self): result = volume_statistics(self.grid_4d_2, 'mean') expected = np.ma.array([1., 1., 1., 1.], mask=False) self.assert_array_equal(result.data, expected) + self.assertEqual(result.units, 'kg m-3') def test_volume_statistics_masked_level(self): """Test to take the volume weighted average of a (2,3,2,2) cube where @@ -307,6 +338,7 @@ def test_volume_statistics_masked_timestep(self): result = volume_statistics(self.grid_4d, 'mean') expected = np.ma.array([1., 1], mask=[True, False]) self.assert_array_equal(result.data, expected) + self.assertEqual(result.units, 'kg m-3') def test_volume_statistics_weights(self): """Test to take the volume weighted average of a (2,3,2,2) cube. @@ -324,13 +356,46 @@ def test_volume_statistics_weights(self): expected = np.ma.array([8.333333333333334, 19.144144144144143], mask=[False, False]) self.assert_array_equal(result.data, expected) + self.assertEqual(result.units, 'kg m-3') - def test_volume_statistics_wrong_operator(self): + def test_volume_statistics_wrong_operator_fail(self): with self.assertRaises(ValueError) as err: volume_statistics(self.grid_4d, 'wrong') self.assertEqual('Volume operator wrong not recognised.', str(err.exception)) + def test_volume_statistics_2d_lat_fail(self): + # Create dummy 2D latitude from depth + new_lat_coord = self.grid_4d_z.coord('zcoord')[0, 0, :, :] + new_lat_coord.rename('latitude') + self.grid_4d_z.remove_coord('latitude') + self.grid_4d_z.add_aux_coord(new_lat_coord, (2, 3)) + with self.assertRaises(CoordinateMultiDimError): + volume_statistics(self.grid_4d_z, 'mean') + + def test_volume_statistics_4d_depth_fail(self): + # Fails because depth coord dims are (0, ...), but must be (1, ...) + with self.assertRaises(ValueError) as err: + volume_statistics(self.grid_4d_z, 'mean') + self.assertIn( + "Supplementary variables are needed to calculate grid cell " + "volumes for cubes with 4D depth coordinate, got cube ", + str(err.exception), + ) + + def test_volume_statistics_2d_depth_fail(self): + # Create new 2D depth coord + new_z_coord = self.grid_4d_z.coord('zcoord')[0, :, :, 0] + self.grid_4d_z.remove_coord('zcoord') + self.grid_4d_z.add_aux_coord(new_z_coord, (1, 2)) + with self.assertRaises(ValueError) as err: + volume_statistics(self.grid_4d_z, 'mean') + self.assertIn( + "Supplementary variables are needed to calculate grid cell " + "volumes for cubes with 2D depth coordinate, got cube ", + str(err.exception), + ) + def test_depth_integration_1d(self): """Test to take the depth integration of a 3 layer cube.""" result = depth_integration(self.grid_3d[:, 0, 0]) From cacaada56ea0c137460597f2d5eaaaedb7e8556e Mon Sep 17 00:00:00 2001 From: Manuel Schlund Date: Mon, 17 Jul 2023 12:19:34 +0200 Subject: [PATCH 06/13] Optimized doc --- esmvalcore/preprocessor/_shared.py | 10 ++++++---- esmvalcore/preprocessor/_volume.py | 3 ++- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/esmvalcore/preprocessor/_shared.py b/esmvalcore/preprocessor/_shared.py index 9601fe23ab..8b3c273c18 100644 --- a/esmvalcore/preprocessor/_shared.py +++ b/esmvalcore/preprocessor/_shared.py @@ -33,9 +33,10 @@ def get_iris_analysis_operation(operator: str) -> iris.analysis.Aggregator: Returns ------- - Object that can be used within :meth:`iris.cube.Cube.collapsed`, - :meth:`iris.cube.Cube.aggregated_by`, or - :meth:`iris.cube.Cube.rolling_window`. + iris.analysis.Aggregator + Object that can be used within :meth:`iris.cube.Cube.collapsed`, + :meth:`iris.cube.Cube.aggregated_by`, or + :meth:`iris.cube.Cube.rolling_window`. Raises ------ @@ -66,7 +67,8 @@ def operator_accept_weights(operator: str) -> bool: Returns ------- - ``True`` if operator support weights, ``False`` otherwise. + bool + ``True`` if operator support weights, ``False`` otherwise. """ return operator.lower() in ('mean', 'sum', 'rms') diff --git a/esmvalcore/preprocessor/_volume.py b/esmvalcore/preprocessor/_volume.py index 7e45a9b42f..626291796a 100644 --- a/esmvalcore/preprocessor/_volume.py +++ b/esmvalcore/preprocessor/_volume.py @@ -100,7 +100,8 @@ def calculate_volume(cube: Cube) -> da.core.Array: Note ---- This only works if the grid cell areas can be calculated (i.e., latitude - and longitude are 1D) and if the depth coordinate is 1D or 4D. + and longitude are 1D) and if the depth coordinate is 1D or 4D with first + dimension 1. Parameters ---------- From f54e23ceb2df56f50b99e46592264118d1a2b5f2 Mon Sep 17 00:00:00 2001 From: Manuel Schlund Date: Mon, 17 Jul 2023 12:51:05 +0200 Subject: [PATCH 07/13] Smarter weights for depth integration --- esmvalcore/preprocessor/_volume.py | 79 ++++++++++--------- .../unit/preprocessor/_volume/test_volume.py | 11 ++- 2 files changed, 52 insertions(+), 38 deletions(-) diff --git a/esmvalcore/preprocessor/_volume.py b/esmvalcore/preprocessor/_volume.py index 626291796a..44e7c9e65e 100644 --- a/esmvalcore/preprocessor/_volume.py +++ b/esmvalcore/preprocessor/_volume.py @@ -8,7 +8,7 @@ import dask.array as da import iris import numpy as np -from iris.coords import CellMeasure +from iris.coords import AuxCoord, CellMeasure from iris.cube import Cube from iris.exceptions import CoordinateMultiDimError @@ -211,76 +211,83 @@ def volume_statistics(cube: Cube, operator: str) -> Cube: return result -def axis_statistics(cube, axis, operator): +def axis_statistics(cube: Cube, axis: str, operator: str) -> Cube: """Perform statistics along a given axis. - Operates over an axis direction. If weights are required, - they are computed using the coordinate bounds. + Operates over an axis direction. If weights are required, they are computed + using the coordinate bounds. Arguments --------- - cube: iris.cube.Cube + cube: Input cube. - axis: str - Direction over where to apply the operator. Possible values - are 'x', 'y', 'z', 't'. - operator: str - Statistics to perform. Available operators are: - 'mean', 'median', 'std_dev', 'sum', 'variance', - 'min', 'max', 'rms'. + axis: + Direction over where to apply the operator. Possible values are `x`, + `y`, `z`, `t`. + operator: + The operation. Allowed options: `mean`, `median`, `min`, `max`, + `std_dev`, `sum`, `variance`, `rms`. Returns ------- iris.cube.Cube - collapsed cube. + Collapsed cube. + """ + operation = get_iris_analysis_operation(operator) + + # Check if a coordinate for the desired axis exists try: coord = cube.coord(axis=axis) except iris.exceptions.CoordinateNotFoundError as err: - raise ValueError(f'Axis {axis} not found in cube ' - f'{cube.summary(shorten=True)}') from err + raise ValueError( + f"Axis {axis} not found in cube {cube.summary(shorten=True)}" + ) from err + + # Multidimensional coordinates are currently not supported coord_dims = cube.coord_dims(coord) if len(coord_dims) > 1: - raise NotImplementedError('axis_statistics not implemented for ' - 'multidimensional coordinates.') - operation = get_iris_analysis_operation(operator) + raise NotImplementedError( + "axis_statistics not implemented for multidimensional " + "coordinates." + ) + + # For weighted operations, create a dummy weights coordinate using the + # bounds of the original coordinate (this handles units properly, e.g., for + # sums) if operator_accept_weights(operator): - coord_dim = coord_dims[0] - expand = list(range(cube.ndim)) - expand.remove(coord_dim) - bounds = coord.core_bounds() - weights = np.abs(bounds[..., 1] - bounds[..., 0]) - weights = np.expand_dims(weights, expand) - weights = da.broadcast_to(weights, cube.shape) - result = cube.collapsed(coord, operation, weights=weights) + weights_coord = AuxCoord( + np.abs(coord.core_bounds()[..., 1] - coord.core_bounds()[..., 0]), + long_name=f'{axis}_axis_statistics_weights', + units=coord.units, + ) + cube.add_aux_coord(weights_coord, coord_dims) + result = cube.collapsed(coord, operation, weights=weights_coord) else: result = cube.collapsed(coord, operation) return result -def depth_integration(cube): +def depth_integration(cube: Cube) -> Cube: """Determine the total sum over the vertical component. - Requires a 3D cube. The z-coordinate - integration is calculated by taking the sum in the z direction of the - cell contents multiplied by the cell thickness. + Requires a 3D cube. The z-coordinate integration is calculated by taking + the sum in the z direction of the cell contents multiplied by the cell + thickness. Arguments --------- - cube: iris.cube.Cube - input cube. + cube: + Input cube. Returns ------- iris.cube.Cube - collapsed cube. + Collapsed cube. """ result = axis_statistics(cube, axis='z', operator='sum') result.rename('Depth_integrated_' + str(cube.name())) - # result.units = Unit('m') * result.units # This doesn't work: - # TODO: Change units on cube to reflect 2D concentration (not 3D) - # Waiting for news from iris community. return result diff --git a/tests/unit/preprocessor/_volume/test_volume.py b/tests/unit/preprocessor/_volume/test_volume.py index 474a03e8d3..94c37614d0 100644 --- a/tests/unit/preprocessor/_volume/test_volume.py +++ b/tests/unit/preprocessor/_volume/test_volume.py @@ -115,6 +115,7 @@ def test_axis_statistics_mean(self): weights = (bounds[:, 1] - bounds[:, 0]) expected = np.average(data, axis=1, weights=weights) self.assert_array_equal(result.data, expected) + self.assertEqual(result.units, 'kg m-3') def test_axis_statistics_median(self): """Test axis statistics in with operator median.""" @@ -123,6 +124,7 @@ def test_axis_statistics_median(self): result = axis_statistics(self.grid_4d, 'z', 'median') expected = np.median(data, axis=1) self.assert_array_equal(result.data, expected) + self.assertEqual(result.units, 'kg m-3') def test_axis_statistics_min(self): """Test axis statistics with operator min.""" @@ -131,6 +133,7 @@ def test_axis_statistics_min(self): result = axis_statistics(self.grid_4d, 'z', 'min') expected = np.min(data, axis=1) self.assert_array_equal(result.data, expected) + self.assertEqual(result.units, 'kg m-3') def test_axis_statistics_max(self): """Test axis statistics with operator max.""" @@ -139,6 +142,7 @@ def test_axis_statistics_max(self): result = axis_statistics(self.grid_4d, 'z', 'max') expected = np.max(data, axis=1) self.assert_array_equal(result.data, expected) + self.assertEqual(result.units, 'kg m-3') def test_axis_statistics_rms(self): """Test axis statistics with operator rms.""" @@ -151,20 +155,23 @@ def test_axis_statistics_std(self): result = axis_statistics(self.grid_4d, 'z', 'std_dev') expected = np.ma.zeros((2, 2, 2)) self.assert_array_equal(result.data, expected) + self.assertEqual(result.units, 'kg m-3') def test_axis_statistics_variance(self): """Test axis statistics with operator variance.""" result = axis_statistics(self.grid_4d, 'z', 'variance') expected = np.ma.zeros((2, 2, 2)) self.assert_array_equal(result.data, expected) + self.assertEqual(result.units, 'kg2 m-6') def test_axis_statistics_sum(self): """Test axis statistics in multiple operators.""" result = axis_statistics(self.grid_4d, 'z', 'sum') expected = np.ma.ones((2, 2, 2)) * 250 self.assert_array_equal(result.data, expected) + self.assertEqual(result.units, 'kg m-2') - def test_wrong_axis_statistics(self): + def test_wrong_axis_statistics_fail(self): """Test raises error when axis is not found in cube.""" with self.assertRaises(ValueError) as err: axis_statistics(self.grid_3d, 't', 'mean') @@ -172,7 +179,7 @@ def test_wrong_axis_statistics(self): f'Axis t not found in cube {self.grid_3d.summary(shorten=True)}', str(err.exception)) - def test_multidimensional_axis_statistics(self): + def test_multidimensional_axis_statistics_fail(self): i_coord = iris.coords.DimCoord( [0, 1], long_name='cell index along first dimension', From 2c9e9180522c175332f4118abc7eadd08d3bc847 Mon Sep 17 00:00:00 2001 From: Manuel Schlund Date: Mon, 17 Jul 2023 13:20:30 +0200 Subject: [PATCH 08/13] Added type hints for _time preprocessor functions --- esmvalcore/preprocessor/_time.py | 391 ++++++++++++++++--------------- 1 file changed, 201 insertions(+), 190 deletions(-) diff --git a/esmvalcore/preprocessor/_time.py b/esmvalcore/preprocessor/_time.py index 5de60f35ac..2d543e8439 100644 --- a/esmvalcore/preprocessor/_time.py +++ b/esmvalcore/preprocessor/_time.py @@ -3,20 +3,22 @@ Allows for selecting data subsets using certain time bounds; constructing seasonal and area averages. """ +from __future__ import annotations + import copy import datetime import logging -from typing import Union +from typing import Iterable, Optional from warnings import filterwarnings import dask.array as da import iris import iris.coord_categorisation -import iris.cube import iris.exceptions import iris.util import isodate import numpy as np +from iris.cube import Cube, CubeList from iris.time import PartialDateTime from esmvalcore.cmor.check import _get_next_month, _get_time_bounds @@ -44,8 +46,15 @@ ) -def extract_time(cube, start_year, start_month, start_day, end_year, end_month, - end_day): +def extract_time( + cube: Cube, + start_year: int, + start_month: int, + start_day: int, + end_year: int, + end_month: int, + end_day: int, +) -> Cube: """Extract a time range from a cube. Given a time range passed in as a series of years, months and days, it @@ -54,20 +63,20 @@ def extract_time(cube, start_year, start_month, start_day, end_year, end_month, Parameters ---------- - cube: iris.cube.Cube - input cube. - start_year: int - start year - start_month: int - start month - start_day: int - start day - end_year: int - end year - end_month: int - end month - end_day: int - end day + cube: + Input cube. + start_year: + Start year. + start_month: + Start month. + start_day: + Start day. + end_year: + End year. + end_month: + End month. + end_day: + End day. Returns ------- @@ -77,7 +86,8 @@ def extract_time(cube, start_year, start_month, start_day, end_year, end_month, Raises ------ ValueError - if time ranges are outside the cube time limits + Time ranges are outside the cube time limits. + """ t_1 = PartialDateTime(year=int(start_year), month=int(start_month), @@ -136,10 +146,7 @@ def _duration_to_date(duration, reference, sign): return date -def _select_timeslice( - cube: iris.cube.Cube, - select: np.ndarray, -) -> Union[iris.cube.Cube, None]: +def _select_timeslice(cube: Cube, select: np.ndarray) -> Cube | None: """Slice a cube along its time axis.""" if select.any(): coord = cube.coord('time') @@ -157,10 +164,10 @@ def _select_timeslice( def _extract_datetime( - cube: iris.cube.Cube, + cube: Cube, start_datetime: PartialDateTime, end_datetime: PartialDateTime, -) -> iris.cube.Cube: +) -> Cube: """Extract a time range from a cube. Given a time range passed in as a datetime.datetime object, it @@ -169,12 +176,12 @@ def _extract_datetime( Parameters ---------- - cube: iris.cube.Cube - input cube. - start_datetime: PartialDateTime - start datetime - end_datetime: PartialDateTime - end datetime + cube: + Input cube. + start_datetime: + Start datetime + end_datetime: + End datetime Returns ------- @@ -221,14 +228,14 @@ def dt2str(time: PartialDateTime) -> str: return cube_slice -def clip_timerange(cube, timerange): +def clip_timerange(cube: Cube, timerange: str) -> Cube: """Extract time range with a resolution up to seconds. Parameters ---------- - cube : iris.cube.Cube + cube: Input cube. - timerange : str + timerange: str Time range in ISO 8601 format. Returns @@ -240,12 +247,10 @@ def clip_timerange(cube, timerange): ------ ValueError Time ranges are outside the cube's time limits. - """ - start_date = timerange.split('/')[0] - start_date = _parse_start_date(start_date) - end_date = timerange.split('/')[1] - end_date = _parse_end_date(end_date) + """ + start_date = _parse_start_date(timerange.split('/')[0]) + end_date = _parse_end_date(timerange.split('/')[1]) if isinstance(start_date, isodate.duration.Duration): start_date = _duration_to_date(start_date, end_date, sign=-1) @@ -280,14 +285,14 @@ def clip_timerange(cube, timerange): return _extract_datetime(cube, t_1, t_2) -def extract_season(cube, season): +def extract_season(cube: Cube, season: str) -> Cube: """Slice cube to get only the data belonging to a specific season. Parameters ---------- - cube: iris.cube.Cube + cube: Original data - season: str + season: Season to extract. Available: DJF, MAM, JJA, SON and all sequentially correct combinations: e.g. JJAS @@ -299,7 +304,8 @@ def extract_season(cube, season): Raises ------ ValueError - if requested season is not present in the cube + Requested season is not present in the cube. + """ season = season.upper() @@ -334,25 +340,26 @@ def extract_season(cube, season): return result -def extract_month(cube, month): +def extract_month(cube: Cube, month: int) -> Cube: """Slice cube to get only the data belonging to a specific month. Parameters ---------- - cube: iris.cube.Cube + cube: Original data - month: int - Month to extract as a number from 1 to 12 + month: + Month to extract as a number from 1 to 12. Returns ------- iris.cube.Cube - data cube for specified month. + Cube for specified month. Raises ------ ValueError - if requested month is not present in the cube + Requested month is not present in the cube. + """ if month not in range(1, 13): raise ValueError('Please provide a month number between 1 and 12.') @@ -366,17 +373,17 @@ def extract_month(cube, month): return result -def get_time_weights(cube): +def get_time_weights(cube: Cube) -> np.ndarray | da.core.Array: """Compute the weighting of the time axis. Parameters ---------- - cube: iris.cube.Cube - input cube. + cube: + Input cube. Returns ------- - numpy.array + np.ndarray or da.core.Array Array of time weights for averaging. """ time = cube.coord('time') @@ -425,28 +432,27 @@ def _aggregate_time_fx(result_cube, source_cube): ancillary_dims) -def hourly_statistics(cube, hours, operator='mean'): +def hourly_statistics(cube: Cube, hours: int, operator: str = 'mean') -> Cube: """Compute hourly statistics. Chunks time in x hours periods and computes statistics over them. Parameters ---------- - cube: iris.cube.Cube - input cube. - - hours: int - Number of hours per period. Must be a divisor of 24 - (1, 2, 3, 4, 6, 8, 12) - - operator: str, optional - Select operator to apply. - Available operators: 'mean', 'median', 'std_dev', 'sum', 'min', 'max' + cube: + Input cube. + hours: + Number of hours per period. Must be a divisor of 24, i.e., (1, 2, 3, 4, + 6, 8, 12). + operator: optional + The operation. Allowed options: `mean`, `median`, `min`, `max`, + `std_dev`, `sum`, `variance`, `rms`. Returns ------- iris.cube.Cube - Hourly statistics cube + Hourly statistics cube. + """ if not cube.coords('hour_group'): iris.coord_categorisation.add_categorised_coord( @@ -471,25 +477,24 @@ def hourly_statistics(cube, hours, operator='mean'): return result -def daily_statistics(cube, operator='mean'): +def daily_statistics(cube: Cube, operator: str = 'mean') -> Cube: """Compute daily statistics. Chunks time in daily periods and computes statistics over them; Parameters ---------- - cube: iris.cube.Cube - input cube. - - operator: str, optional - Select operator to apply. - Available operators: 'mean', 'median', 'std_dev', 'sum', 'min', - 'max', 'rms' + cube: + Input cube. + operator: optional + The operation. Allowed options: `mean`, `median`, `min`, `max`, + `std_dev`, `sum`, `variance`, `rms`. Returns ------- iris.cube.Cube - Daily statistics cube + Daily statistics cube. + """ if not cube.coords('day_of_year'): iris.coord_categorisation.add_day_of_year(cube, 'time') @@ -504,25 +509,24 @@ def daily_statistics(cube, operator='mean'): return result -def monthly_statistics(cube, operator='mean'): +def monthly_statistics(cube: Cube, operator: str = 'mean') -> Cube: """Compute monthly statistics. Chunks time in monthly periods and computes statistics over them; Parameters ---------- - cube: iris.cube.Cube - input cube. - - operator: str, optional - Select operator to apply. - Available operators: 'mean', 'median', 'std_dev', 'sum', 'min', - 'max', 'rms' + cube: + Input cube. + operator: optional + The operation. Allowed options: `mean`, `median`, `min`, `max`, + `std_dev`, `sum`, `variance`, `rms`. Returns ------- iris.cube.Cube - Monthly statistics cube + Monthly statistics cube. + """ if not cube.coords('month_number'): iris.coord_categorisation.add_month_number(cube, 'time') @@ -535,24 +539,23 @@ def monthly_statistics(cube, operator='mean'): return result -def seasonal_statistics(cube, - operator='mean', - seasons=('DJF', 'MAM', 'JJA', 'SON')): +def seasonal_statistics( + cube: Cube, + operator: str = 'mean', + seasons: Iterable[str] = ('DJF', 'MAM', 'JJA', 'SON'), +) -> Cube: """Compute seasonal statistics. Chunks time seasons and computes statistics over them. Parameters ---------- - cube: iris.cube.Cube - input cube. - - operator: str, optional - Select operator to apply. - Available operators: 'mean', 'median', 'std_dev', 'sum', 'min', - 'max', 'rms' - - seasons: list or tuple of str, optional + cube: + Input cube. + operator: optional + The operation. Allowed options: `mean`, `median`, `min`, `max`, + `std_dev`, `sum`, `variance`, `rms`. + seasons: optional Seasons to build. Available: ('DJF', 'MAM', 'JJA', SON') (default) and all sequentially correct combinations holding every month of a year: e.g. ('JJAS','ONDJFMAM'), or less in case of prior season @@ -561,7 +564,8 @@ def seasonal_statistics(cube, Returns ------- iris.cube.Cube - Seasonal statistic cube + Seasonal statistic cube. + """ seasons = tuple(sea.upper() for sea in seasons) @@ -595,18 +599,19 @@ def seasonal_statistics(cube, # Ranging on [29, 31] days makes this calendar-independent # the only season this could not work is 'F' but this raises an # ValueError - def spans_full_season(cube): + def spans_full_season(cube: Cube) -> list[bool]: """Check for all month present in the season. Parameters ---------- - cube: iris.cube.Cube - input cube. + cube: + Input cube. Returns ------- - bool - truth statement if time bounds are within (month*29, month*31) + list[bool] + Truth statements if time bounds are within (month*29, month*31) + """ time = cube.coord('time') num_days = [(tt.bounds[0, 1] - tt.bounds[0, 0]) for tt in time] @@ -622,7 +627,7 @@ def spans_full_season(cube): return result -def annual_statistics(cube, operator='mean'): +def annual_statistics(cube: Cube, operator: str = 'mean') -> Cube: """Compute annual statistics. Note that this function does not weight the annual mean if @@ -631,18 +636,17 @@ def annual_statistics(cube, operator='mean'): Parameters ---------- - cube: iris.cube.Cube - input cube. - - operator: str, optional - Select operator to apply. - Available operators: 'mean', 'median', 'std_dev', 'sum', 'min', - 'max', 'rms' + cube: + Input cube. + operator: optional + The operation. Allowed options: `mean`, `median`, `min`, `max`, + `std_dev`, `sum`, `variance`, `rms`. Returns ------- iris.cube.Cube - Annual statistics cube + Annual statistics cube. + """ # TODO: Add weighting in time dimension. See iris issue 3290 # https://github.com/SciTools/iris/issues/3290 @@ -656,7 +660,7 @@ def annual_statistics(cube, operator='mean'): return result -def decadal_statistics(cube, operator='mean'): +def decadal_statistics(cube: Cube, operator: str = 'mean') -> Cube: """Compute decadal statistics. Note that this function does not weight the decadal mean if @@ -665,18 +669,17 @@ def decadal_statistics(cube, operator='mean'): Parameters ---------- - cube: iris.cube.Cube - input cube. - + cube: + Input cube. operator: str, optional - Select operator to apply. - Available operators: 'mean', 'median', 'std_dev', 'sum', 'min', - 'max', 'rms' + The operation. Allowed options: `mean`, `median`, `min`, `max`, + `std_dev`, `sum`, `variance`, `rms`. Returns ------- iris.cube.Cube - Decadal statistics cube + Decadal statistics cube. + """ # TODO: Add weighting in time dimension. See iris issue 3290 # https://github.com/SciTools/iris/issues/3290 @@ -697,10 +700,12 @@ def get_decade(coord, value): return result -def climate_statistics(cube, - operator='mean', - period='full', - seasons=('DJF', 'MAM', 'JJA', 'SON')): +def climate_statistics( + cube: Cube, + operator: str = 'mean', + period: str = 'full', + seasons: Iterable[str] = ('DJF', 'MAM', 'JJA', 'SON'), +) -> Cube: """Compute climate statistics with the specified granularity. Computes statistics for the whole dataset. It is possible to get them for @@ -708,20 +713,16 @@ def climate_statistics(cube, Parameters ---------- - cube: iris.cube.Cube + cube: Input cube. - - operator: str, optional - Select operator to apply. - Available operators: 'mean', 'median', 'std_dev', 'sum', 'min', - 'max', 'rms'. - - period: str, optional - Period to compute the statistic over. - Available periods: 'full', 'season', 'seasonal', 'monthly', 'month', - 'mon', 'daily', 'day', 'hourly', 'hour', 'hr'. - - seasons: list or tuple of str, optional + operator: optional + The operation. Allowed options: `mean`, `median`, `min`, `max`, + `std_dev`, `sum`, `variance`, `rms`. + period: optional + Period to compute the statistic over. Available periods: `full`, + `season`, `seasonal`, `monthly`, `month`, `mon`, `daily`, `day`, + `hourly`, `hour`, `hr`. + seasons: Seasons to use if needed. Defaults to ('DJF', 'MAM', 'JJA', 'SON'). Returns @@ -756,7 +757,7 @@ def climate_statistics(cube, iris.util.promote_aux_coord_to_dim_coord(clim_cube, clim_coord.name()) else: - clim_cube = iris.cube.CubeList( + clim_cube = CubeList( clim_cube.slices_over(clim_coord.name())).merge_cube() cube.remove_coord(clim_coord) @@ -766,14 +767,17 @@ def climate_statistics(cube, "climate_statistics changed dtype from " "%s to %s, changing back", original_dtype, new_dtype) clim_cube.data = clim_cube.core_data().astype(original_dtype) + return clim_cube -def anomalies(cube, - period, - reference=None, - standardize=False, - seasons=('DJF', 'MAM', 'JJA', 'SON')): +def anomalies( + cube: Cube, + period: str, + reference: Optional[dict] = None, + standardize: bool = False, + seasons: Iterable[str] = ('DJF', 'MAM', 'JJA', 'SON'), +) -> Cube: """Compute anomalies using a mean with the specified granularity. Computes anomalies based on hourly, daily, monthly, seasonal or yearly @@ -781,23 +785,19 @@ def anomalies(cube, Parameters ---------- - cube: iris.cube.Cube + cube: Input cube. - - period: str - Period to compute the statistic over. - Available periods: 'full', 'season', 'seasonal', 'monthly', 'month', - 'mon', 'daily', 'day', 'hourly', 'hour', 'hr'. - - reference: list int, optional, default: None - Period of time to use a reference, as needed for the 'extract_time' - preprocessor function. If ``None``, all available data is used as a - reference. - - standardize: bool, optional + period: + Period to compute the statistic over. Available periods: `full`, + `season`, `seasonal`, `monthly`, `month`, `mon`, `daily`, `day`, + `hourly`, `hour`, `hr`. + reference: optional + Period of time to use a reference, as needed for the + :func:`~esmvalcore.preprocessor.extract_time` preprocessor function. + If ``None``, all available data is used as a reference. + standardize: optional If ``True`` standardized anomalies are calculated. - - seasons: list or tuple of str, optional + seasons: optional Seasons to use if needed. Defaults to ('DJF', 'MAM', 'JJA', 'SON'). Returns @@ -891,7 +891,7 @@ def _get_period_coord(cube, period, seasons): raise ValueError(f"Period '{period}' not supported") -def regrid_time(cube, frequency): +def regrid_time(cube: Cube, frequency: str) -> Cube: """Align time axis for cubes so they can be subtracted. Operations on time units, time points and auxiliary @@ -903,15 +903,16 @@ def regrid_time(cube, frequency): Parameters ---------- - cube: iris.cube.Cube - input cube. - frequency: str - data frequency: mon, day, 1hr, 3hr or 6hr + cube: + Input cube. + frequency: + Data frequency: `mon`, `day`, `1hr`, `3hr` or `6hr`. Returns ------- iris.cube.Cube - cube with converted time axis and units. + Cube with converted time axis and units. + """ # standardize time points coord = cube.coord('time') @@ -1000,11 +1001,13 @@ def low_pass_weights(window, cutoff): return weights[1:-1] -def timeseries_filter(cube, - window, - span, - filter_type='lowpass', - filter_stats='sum'): +def timeseries_filter( + cube: Cube, + window: int, + span: int, + filter_type: str = 'lowpass', + filter_stats: str = 'sum', +) -> Cube: """Apply a timeseries filter. Method borrowed from `iris example @@ -1018,33 +1021,34 @@ def timeseries_filter(cube, Parameters ---------- - cube: iris.cube.Cube - input cube. - window: int + cube: + Input cube. + window: The length of the filter window (in units of cube time coordinate). - span: int + span: Number of months/days (depending on data frequency) on which weights should be computed e.g. 2-yearly: span = 24 (2 x 12 months). Span should have same units as cube time coordinate. - filter_type: str, optional + filter_type: optional Type of filter to be applied; default 'lowpass'. Available types: 'lowpass'. - filter_stats: str, optional - Type of statistic to aggregate on the rolling window; default 'sum'. - Available operators: 'mean', 'median', 'std_dev', 'sum', 'min', - 'max', 'rms' + filter_stats: optional + Type of statistic to aggregate on the rolling window; default: `sum`. + Allowed options: `mean`, `median`, `min`, `max`, `std_dev`, `sum`, + `variance`, `rms`. Returns ------- iris.cube.Cube - cube time-filtered using 'rolling_window'. + Cube time-filtered using 'rolling_window'. Raises ------ iris.exceptions.CoordinateNotFoundError: Cube does not have time coordinate. NotImplementedError: - If filter_type is not implemented. + `filter_type` is not implemented. + """ try: cube.coord('time') @@ -1075,7 +1079,7 @@ def timeseries_filter(cube, return cube -def resample_hours(cube, interval, offset=0): +def resample_hours(cube: Cube, interval: int, offset: int = 0) -> Cube: """Convert x-hourly data to y-hourly by eliminating extra timesteps. Convert x-hourly data to y-hourly (y > x) by eliminating the extra @@ -1094,11 +1098,11 @@ def resample_hours(cube, interval, offset=0): Parameters ---------- - cube: iris.cube.Cube + cube: Input cube. - interval: int + interval: The period (hours) of the desired data. - offset: int, optional + offset: optional The firs hour (hours) of the desired data. Returns @@ -1110,6 +1114,7 @@ def resample_hours(cube, interval, offset=0): ------ ValueError: The specified frequency is not a divisor of 24. + """ allowed_intervals = (1, 2, 3, 4, 6, 12) if interval not in allowed_intervals: @@ -1136,7 +1141,12 @@ def resample_hours(cube, interval, offset=0): return cube -def resample_time(cube, month=None, day=None, hour=None): +def resample_time( + cube: Cube, + month: Optional[int] = None, + day: Optional[int] = None, + hour: Optional[int] = None, +) -> Cube: """Change frequency of data by resampling it. Converts data from one frequency to another by extracting the timesteps @@ -1160,19 +1170,20 @@ def resample_time(cube, month=None, day=None, hour=None): Parameters ---------- - cube: iris.cube.Cube + cube: Input cube. - month: int, optional - Month to extract - day: int, optional - Day to extract - hour: int, optional - Hour to extract + month: optional + Month to extract. + day: optional + Day to extract. + hour: optional + Hour to extract. Returns ------- iris.cube.Cube Cube with the new frequency. + """ time = cube.coord('time') dates = time.units.num2date(time.points) From b6fd49d6391f4db4766d8c81f01f6246e5e8fbf0 Mon Sep 17 00:00:00 2001 From: Manuel Schlund Date: Mon, 17 Jul 2023 14:13:44 +0200 Subject: [PATCH 09/13] Added type hints to volume preprocessors --- esmvalcore/preprocessor/_volume.py | 96 +++++++++++++++++------------- 1 file changed, 56 insertions(+), 40 deletions(-) diff --git a/esmvalcore/preprocessor/_volume.py b/esmvalcore/preprocessor/_volume.py index 44e7c9e65e..869873f91f 100644 --- a/esmvalcore/preprocessor/_volume.py +++ b/esmvalcore/preprocessor/_volume.py @@ -3,7 +3,10 @@ Allows for selecting data subsets using certain volume bounds; selecting depth or height regions; constructing volumetric averages; """ +from __future__ import annotations + import logging +from typing import Iterable, Sequence import dask.array as da import iris @@ -20,12 +23,12 @@ def extract_volume( - cube, - z_min, - z_max, - interval_bounds='open', - nearest_value=False -): + cube: Cube, + z_min: float, + z_max: float, + interval_bounds: str = 'open', + nearest_value: bool = False, +) -> Cube: """Subset a cube based on a range of values in the z-coordinate. Function that subsets a cube on a box of (z_min, z_max), @@ -41,22 +44,23 @@ def extract_volume( Parameters ---------- - cube: iris.cube.Cube - input cube. - z_min: float - minimum depth to extract. - z_max: float - maximum depth to extract. - interval_bounds: str - sets left bound of the interval to either 'open', 'closed', + cube: + Input cube. + z_min: + Minimum depth to extract. + z_max: + Maximum depth to extract. + interval_bounds: + Sets left bound of the interval to either 'open', 'closed', 'left_closed' or 'right_closed'. - nearest_value: bool - extracts considering the nearest value of z-coord to z_min and z_max. + nearest_value: + Extracts considering the nearest value of z-coord to z_min and z_max. Returns ------- iris.cube.Cube z-coord extracted cube. + """ if z_min > z_max: # minimum is below maximum, so switch them around @@ -285,13 +289,18 @@ def depth_integration(cube: Cube) -> Cube: ------- iris.cube.Cube Collapsed cube. + """ result = axis_statistics(cube, axis='z', operator='sum') result.rename('Depth_integrated_' + str(cube.name())) return result -def extract_transect(cube, latitude=None, longitude=None): +def extract_transect( + cube: Cube, + latitude: None | float | Iterable[float] = None, + longitude: None | float | Iterable[float] = None, +) -> Cube: """Extract data along a line of constant latitude or longitude. Both arguments, latitude and longitude, are treated identically. @@ -315,29 +324,30 @@ def extract_transect(cube, latitude=None, longitude=None): Parameters ---------- - cube: iris.cube.Cube - input cube. - latitude: None, float or [float, float], optional - transect latiude or range. - longitude: None, float or [float, float], optional - transect longitude or range. + cube: + Input cube. + latitude: optional + Transect latitude or range. + longitude: optional + Transect longitude or range. Returns ------- iris.cube.Cube - collapsed cube. + Collapsed cube. Raises ------ ValueError - slice extraction not implemented for irregular grids. + Slice extraction not implemented for irregular grids. ValueError - latitude and longitude are both floats or lists; not allowed - to slice on both axes at the same time. + Latitude and longitude are both floats or lists; not allowed to slice + on both axes at the same time. + """ # ### coord_dim2 = False - second_coord_range = False + second_coord_range: None | list = None lats = cube.coord('latitude') lons = cube.coord('longitude') @@ -375,13 +385,18 @@ def extract_transect(cube, latitude=None, longitude=None): slices = [slice(None) for i in cube.shape] slices[coord_dim] = coord_index - if second_coord_range: + if second_coord_range is not None: slices[coord_dim2] = slice(second_coord_range[0], second_coord_range[1]) return cube[tuple(slices)] -def extract_trajectory(cube, latitudes, longitudes, number_points=2): +def extract_trajectory( + cube: Cube, + latitudes: Sequence[float], + longitudes: Sequence[float], + number_points: int = 2, +) -> Cube: """Extract data along a trajectory. latitudes and longitudes are the pairs of coordinates for two points. @@ -401,24 +416,25 @@ def extract_trajectory(cube, latitudes, longitudes, number_points=2): Parameters ---------- - cube: iris.cube.Cube - input cube. - latitudes: list - list of latitude coordinates (floats). - longitudes: list - list of longitude coordinates (floats). - number_points: int - number of points to extrapolate (optional). + cube: + Input cube. + latitudes: + Latitude coordinates. + longitudes: + Longitude coordinates. + number_points: optional + Number of points to extrapolate. Returns ------- iris.cube.Cube - collapsed cube. + Collapsed cube. Raises ------ ValueError - if latitude and longitude have different dimensions. + Latitude and longitude have different dimensions. + """ from iris.analysis.trajectory import interpolate From 1ebe1df13b4ff75fa0150aec1f07af7946404231 Mon Sep 17 00:00:00 2001 From: Manuel Schlund Date: Mon, 17 Jul 2023 15:11:06 +0200 Subject: [PATCH 10/13] Smarter weights for climate_statistics --- esmvalcore/preprocessor/_time.py | 27 +++++++++------ tests/unit/preprocessor/_time/test_time.py | 38 +++++++++++++++++++--- 2 files changed, 50 insertions(+), 15 deletions(-) diff --git a/esmvalcore/preprocessor/_time.py b/esmvalcore/preprocessor/_time.py index 2d543e8439..37911987f0 100644 --- a/esmvalcore/preprocessor/_time.py +++ b/esmvalcore/preprocessor/_time.py @@ -18,6 +18,7 @@ import iris.util import isodate import numpy as np +from iris.coords import AuxCoord from iris.cube import Cube, CubeList from iris.time import PartialDateTime @@ -385,6 +386,7 @@ def get_time_weights(cube: Cube) -> np.ndarray | da.core.Array: ------- np.ndarray or da.core.Array Array of time weights for averaging. + """ time = cube.coord('time') coord_dims = cube.coord_dims('time') @@ -394,8 +396,8 @@ def get_time_weights(cube: Cube) -> np.ndarray | da.core.Array: if len(coord_dims) > 1: raise ValueError( f"Weighted statistical operations are not supported for " - f"{len(coord_dims):d}D time coordinates, expected " - f"0D or 1D") + f"{len(coord_dims):d}D time coordinates, expected 0D or 1D" + ) # Extract 1D time weights (= lengths of time intervals) time_weights = time.core_bounds()[:, 1] - time.core_bounds()[:, 0] @@ -737,14 +739,17 @@ def climate_statistics( if period in ('full', ): operator_method = get_iris_analysis_operation(operator) if operator_accept_weights(operator): - time_weights = get_time_weights(cube) - if time_weights.min() == time_weights.max(): - # No weighting needed. - clim_cube = cube.collapsed('time', operator_method) - else: - clim_cube = cube.collapsed('time', - operator_method, - weights=time_weights) + time_weights_coord = AuxCoord( + get_time_weights(cube), + long_name='time_weights', + units=cube.coord('time').units, + ) + cube.add_aux_coord(time_weights_coord, cube.coord_dims('time')) + clim_cube = cube.collapsed( + 'time', + operator_method, + weights=time_weights_coord, + ) else: clim_cube = cube.collapsed('time', operator_method) else: @@ -1063,6 +1068,8 @@ def timeseries_filter( ] if filter_type in supported_filters: if filter_type == 'lowpass': + # These weights sum to one and are dimensionless (-> we do NOT need + # to consider units for sums) wgts = low_pass_weights(window, 1. / span) else: raise NotImplementedError( diff --git a/tests/unit/preprocessor/_time/test_time.py b/tests/unit/preprocessor/_time/test_time.py index ab9e086532..b847a2972f 100644 --- a/tests/unit/preprocessor/_time/test_time.py +++ b/tests/unit/preprocessor/_time/test_time.py @@ -532,7 +532,11 @@ def _create_cube(data, times, bounds): standard_name='time', units=Unit('days since 1950-01-01', calendar='gregorian')) - cube = iris.cube.Cube(data, dim_coords_and_dims=[(time, 0)]) + cube = iris.cube.Cube( + data, + dim_coords_and_dims=[(time, 0)], + units='kg m-2 s-1' + ) return cube def test_time_mean(self): @@ -545,6 +549,7 @@ def test_time_mean(self): result = climate_statistics(cube, operator='mean') expected = np.array([1.], dtype=np.float32) assert_array_equal(result.data, expected) + self.assertEqual(result.units, 'kg m-2 s-1') def test_time_mean_uneven(self): """Test for time average of a 1D field with uneven time boundaries.""" @@ -556,6 +561,7 @@ def test_time_mean_uneven(self): result = climate_statistics(cube, operator='mean') expected = np.array([4.], dtype=np.float32) assert_array_equal(result.data, expected) + self.assertEqual(result.units, 'kg m-2 s-1') def test_time_mean_365_day(self): """Test for time avg of a realistic time axis and 365 day calendar.""" @@ -568,6 +574,7 @@ def test_time_mean_365_day(self): result = climate_statistics(cube, operator='mean') expected = np.array([1.], dtype=np.float32) assert_array_equal(result.data, expected) + self.assertEqual(result.units, 'kg m-2 s-1') def test_time_sum(self): """Test for time sum of a 1D field.""" @@ -577,8 +584,9 @@ def test_time_sum(self): cube = self._create_cube(data, times, bounds) result = climate_statistics(cube, operator='sum') - expected = np.array([4.], dtype=np.float32) + expected = np.array([120.], dtype=np.float32) assert_array_equal(result.data, expected) + self.assertEqual(result.units, '86400 kg m-2') def test_time_sum_weighted(self): """Test for time sum of a 1D field.""" @@ -590,6 +598,7 @@ def test_time_sum_weighted(self): result = climate_statistics(cube, operator='sum') expected = np.array([74.], dtype=np.float32) assert_array_equal(result.data, expected) + self.assertEqual(result.units, '86400 kg m-2') def test_time_sum_uneven(self): """Test for time sum of a 1D field with uneven time boundaries.""" @@ -601,6 +610,7 @@ def test_time_sum_uneven(self): result = climate_statistics(cube, operator='sum') expected = np.array([16.0], dtype=np.float32) assert_array_equal(result.data, expected) + self.assertEqual(result.units, '86400 kg m-2') def test_time_sum_365_day(self): """Test for time sum of a realistic time axis and 365 day calendar.""" @@ -614,6 +624,7 @@ def test_time_sum_365_day(self): result = climate_statistics(cube, operator='sum') expected = np.array([211.], dtype=np.float32) assert_array_equal(result.data, expected) + self.assertEqual(result.units, '86400 kg m-2') def test_season_climatology(self): """Test for time avg of a realistic time axis and 365 day calendar.""" @@ -627,6 +638,7 @@ def test_season_climatology(self): result = climate_statistics(cube, operator='mean', period=period) expected = np.array([1., 1., 1.], dtype=np.float32) assert_array_equal(result.data, expected) + self.assertEqual(result.units, 'kg m-2 s-1') def test_custom_season_climatology(self): """Test for time avg of a realisitc time axis and 365 day calendar.""" @@ -643,6 +655,7 @@ def test_custom_season_climatology(self): seasons=('jfmamj', 'jasond')) expected = np.array([1., 1.], dtype=np.float32) assert_array_equal(result.data, expected) + self.assertEqual(result.units, 'kg m-2 s-1') def test_monthly(self): """Test for time avg of a realistic time axis and 365 day calendar.""" @@ -656,6 +669,7 @@ def test_monthly(self): result = climate_statistics(cube, operator='mean', period=period) expected = np.ones((6, ), dtype=np.float32) assert_array_equal(result.data, expected) + self.assertEqual(result.units, 'kg m-2 s-1') def test_day(self): """Test for time avg of a realistic time axis and 365 day calendar.""" @@ -669,6 +683,7 @@ def test_day(self): result = climate_statistics(cube, operator='mean', period=period) expected = np.array([1, 1, 1], dtype=np.float32) assert_array_equal(result.data, expected) + self.assertEqual(result.units, 'kg m-2 s-1') def test_hour(self): """Test for time avg of a realistic time axis and 365 day calendar.""" @@ -684,6 +699,7 @@ def test_hour(self): assert_array_equal(result.data, expected) expected_hours = [0, 1, 2] assert_array_equal(result.coord('hour').points, expected_hours) + self.assertEqual(result.units, 'kg m-2 s-1') def test_period_not_supported(self): """Test for time avg of a realistic time axis and 365 day calendar.""" @@ -706,6 +722,7 @@ def test_time_max(self): result = climate_statistics(cube, operator='max') expected = np.array([2.], dtype=np.float32) assert_array_equal(result.data, expected) + self.assertEqual(result.units, 'kg m-2 s-1') def test_time_min(self): """Test for time min of a 1D field.""" @@ -717,6 +734,7 @@ def test_time_min(self): result = climate_statistics(cube, operator='min') expected = np.array([0.], dtype=np.float32) assert_array_equal(result.data, expected) + self.assertEqual(result.units, 'kg m-2 s-1') def test_time_median(self): """Test for time meadian of a 1D field.""" @@ -728,6 +746,7 @@ def test_time_median(self): result = climate_statistics(cube, operator='median') expected = np.array([1.], dtype=np.float32) assert_array_equal(result.data, expected) + self.assertEqual(result.units, 'kg m-2 s-1') def test_time_rms(self): """Test for time rms of a 1D field.""" @@ -739,6 +758,7 @@ def test_time_rms(self): result = climate_statistics(cube, operator='rms') expected = np.array([(5 / 3)**0.5], dtype=np.float32) assert_array_equal(result.data, expected) + self.assertEqual(result.units, 'kg m-2 s-1') def test_time_dependent_fx(self): """Test average time dimension in time-dependent fx vars.""" @@ -768,6 +788,7 @@ def test_time_dependent_fx(self): self.assertEqual(result.cell_measure('ocean_volume').ndim, 2) self.assertEqual( result.ancillary_variable('land_ice_area_fraction').ndim, 2) + self.assertEqual(result.units, 'kg m-2 s-1') class TestSeasonalStatistics(tests.Test): @@ -1844,7 +1865,11 @@ def _make_cube(): coord_system=coord_sys) lons = get_lon_coord() coords_spec4 = [(time, 0), (zcoord, 1), (lats, 2), (lons, 3)] - cube1 = iris.cube.Cube(data2, dim_coords_and_dims=coords_spec4) + cube1 = iris.cube.Cube( + data2, + dim_coords_and_dims=coords_spec4, + units='kg m-2 s-1', + ) return cube1 @@ -1928,12 +1953,13 @@ def test_climate_statistics_0d_time_1d_lon(): lons = get_lon_coord() cube = iris.cube.Cube([[1.0, -1.0, 42.0]], var_name='x', - units='K', + units='K day-1', dim_coords_and_dims=[(time, 0), (lons, 1)]) new_cube = climate_statistics(cube, operator='sum', period='full') assert cube.shape == (1, 3) assert new_cube.shape == (3, ) - np.testing.assert_allclose(new_cube.data, [1.0, -1.0, 42.0]) + np.testing.assert_allclose(new_cube.data, [2.0, -2.0, 84.0]) + assert new_cube.units == 'K' def test_climate_statistics_complex_cube_sum(): @@ -1943,6 +1969,7 @@ def test_climate_statistics_complex_cube_sum(): assert cube.shape == (2, 1, 1, 3) assert new_cube.shape == (1, 1, 3) np.testing.assert_allclose(new_cube.data, [[[45.0, 45.0, 45.0]]]) + assert new_cube.units == '86400 kg m-2' def test_climate_statistics_complex_cube_mean(): @@ -1952,6 +1979,7 @@ def test_climate_statistics_complex_cube_mean(): assert cube.shape == (2, 1, 1, 3) assert new_cube.shape == (1, 1, 3) np.testing.assert_allclose(new_cube.data, [[[1.0, 1.0, 1.0]]]) + assert new_cube.units == 'kg m-2 s-1' class TestResampleHours(tests.Test): From 0e93e4cb2161f0f4d2d1cc22b57ef2d924514d31 Mon Sep 17 00:00:00 2001 From: Manuel Schlund Date: Mon, 17 Jul 2023 15:20:25 +0200 Subject: [PATCH 11/13] Added doc --- esmvalcore/preprocessor/_area.py | 4 ++-- esmvalcore/preprocessor/_time.py | 11 +++++++++++ esmvalcore/preprocessor/_volume.py | 9 +++++++-- 3 files changed, 20 insertions(+), 4 deletions(-) diff --git a/esmvalcore/preprocessor/_area.py b/esmvalcore/preprocessor/_area.py index 5e2040dcb9..e25e761ce2 100644 --- a/esmvalcore/preprocessor/_area.py +++ b/esmvalcore/preprocessor/_area.py @@ -304,8 +304,8 @@ def area_statistics(cube: Cube, operator: str) -> Cube: | `rms` | Area weighted root mean square | +------------+--------------------------------------------------+ - Note that for sums, the units of the resulting cube will be multiplied by - m:math:`^2`. + Note that for area-weighted sums, the units of the resulting cube will be + multiplied by m:math:`^2`. Parameters ---------- diff --git a/esmvalcore/preprocessor/_time.py b/esmvalcore/preprocessor/_time.py index 37911987f0..265004a8cf 100644 --- a/esmvalcore/preprocessor/_time.py +++ b/esmvalcore/preprocessor/_time.py @@ -713,6 +713,13 @@ def climate_statistics( Computes statistics for the whole dataset. It is possible to get them for the full period or with the data grouped by hour, day, month or season. + Note + ---- + The `mean`, `sum` and `rms` operations over the `full` period are weighted + by the time coordinate, i.e., the length of the time intervals. For `sum`, + the units of the resulting cube will be multiplied by corresponding time + units (e.g., days). + Parameters ---------- cube: @@ -736,6 +743,7 @@ def climate_statistics( original_dtype = cube.dtype period = period.lower() + # Use Cube.collapsed when full period is requested if period in ('full', ): operator_method = get_iris_analysis_operation(operator) if operator_accept_weights(operator): @@ -752,6 +760,8 @@ def climate_statistics( ) else: clim_cube = cube.collapsed('time', operator_method) + + # Use Cube.aggregated_by for other periods else: clim_coord = _get_period_coord(cube, period, seasons) operator = get_iris_analysis_operation(operator) @@ -766,6 +776,7 @@ def climate_statistics( clim_cube.slices_over(clim_coord.name())).merge_cube() cube.remove_coord(clim_coord) + # Make sure that original dtype is preserved new_dtype = clim_cube.dtype if original_dtype != new_dtype: logger.debug( diff --git a/esmvalcore/preprocessor/_volume.py b/esmvalcore/preprocessor/_volume.py index 869873f91f..4e9292409a 100644 --- a/esmvalcore/preprocessor/_volume.py +++ b/esmvalcore/preprocessor/_volume.py @@ -218,8 +218,13 @@ def volume_statistics(cube: Cube, operator: str) -> Cube: def axis_statistics(cube: Cube, axis: str, operator: str) -> Cube: """Perform statistics along a given axis. - Operates over an axis direction. If weights are required, they are computed - using the coordinate bounds. + Operates over an axis direction. + + Note + ---- + The `mean`, `sum` and `rms` operations are weighted by the corresponding + coordinate bounds. For `sum`, the units of the resulting cube will be + multiplied by corresponding coordinate units. Arguments --------- From 644415ef06ed7fbc5d92ac6a4f326c6563afdaa4 Mon Sep 17 00:00:00 2001 From: Manuel Schlund Date: Mon, 17 Jul 2023 15:32:29 +0200 Subject: [PATCH 12/13] Added further notes to doc --- doc/recipe/preprocessor.rst | 50 ++++++++++++++++++++---------- esmvalcore/preprocessor/_volume.py | 3 +- 2 files changed, 35 insertions(+), 18 deletions(-) diff --git a/doc/recipe/preprocessor.rst b/doc/recipe/preprocessor.rst index 44eb9d0dd4..0b353e479f 100644 --- a/doc/recipe/preprocessor.rst +++ b/doc/recipe/preprocessor.rst @@ -1386,7 +1386,7 @@ statistics. Parameters: * operator: operation to apply. Accepted values are 'mean', 'median', - 'std_dev', 'min', 'max', 'sum' and 'rms'. Default is 'mean' + 'std_dev', 'variance', 'min', 'max', 'sum' and 'rms'. Default is 'mean'. * period: define the granularity of the statistics: get values for the full period, for each month, day of year or hour of day. @@ -1396,6 +1396,12 @@ Parameters: * seasons: if period 'seasonal' or 'season' allows to set custom seasons. Default is '[DJF, MAM, JJA, SON]' +.. note:: + The 'mean', 'sum' and 'rms' operations over the 'full' period are weighted + by the time coordinate, i.e., the length of the time intervals. + For 'sum', the units of the resulting cube are multiplied by corresponding + time units (e.g., days). + Examples: * Monthly climatology: @@ -1875,23 +1881,26 @@ See also :func:`esmvalcore.preprocessor.meridional_means`. ``area_statistics`` ------------------- -This function calculates the average value over a region - weighted by the cell -areas of the region. This function takes the argument, ``operator``: the name -of the operation to apply. +This function calculates statistics over a region. +It takes one argument, ``operator``, which is the name of the operation to +apply. This function can be used to apply several different operations in the -horizontal plane: mean, standard deviation, median, variance, minimum, maximum and root mean square. +horizontal plane: mean, sum, standard deviation, median, variance, minimum, +maximum and root mean square. +The operations mean, sum and root mean square are area weighted. +For sums, the units of the resulting cubes are multiplied by m:math:`^2`. -Note that this function is applied over the entire dataset. If only a specific -region, depth layer or time period is required, then those regions need to be -removed using other preprocessor operations in advance. +Note that this function is applied over the entire dataset. +If only a specific region, depth layer or time period is required, then those +regions need to be removed using other preprocessor operations in advance. -This function requires a cell area `cell measure`_, unless the coordinates of the -input data are regular 1D latitude and longitude coordinates so the cell areas -can be computed. -The required supplementary variable, either ``areacella`` for atmospheric variables -or ``areacello`` for ocean variables, can be attached to the main dataset -as described in :ref:`supplementary_variables`. +This function requires a cell area `cell measure`_, unless the coordinates of +the input data are regular 1D latitude and longitude coordinates so the cell +areas can be computed. +The required supplementary variable, either ``areacella`` for atmospheric +variables or ``areacello`` for ocean variables, can be attached to the main +dataset as described in :ref:`supplementary_variables`. .. deprecated:: 2.8.0 The optional ``fx_variables`` argument specifies the fx variables that the user @@ -2023,15 +2032,22 @@ Takes arguments: be performed must be one-dimensional, as multidimensional coordinates are not supported in this preprocessor. + The 'mean', 'sum' and 'rms' operations are weighted by the corresponding + coordinate bounds. + For 'sum', the units of the resulting cube will be multiplied by + corresponding coordinate units. + See also :func:`esmvalcore.preprocessor.axis_statistics`. ``depth_integration`` --------------------- -This function integrates over the depth dimension. This function does a -weighted sum along the `z`-coordinate, and removes the `z` direction of the -output cube. This preprocessor takes no arguments. +This function integrates over the depth dimension. +This function does a weighted sum along the `z`-coordinate, and removes the `z` +direction of the output cube. +This preprocessor takes no arguments. +The units of the resulting cube are multiplied by the `z`-coordinate units. See also :func:`esmvalcore.preprocessor.depth_integration`. diff --git a/esmvalcore/preprocessor/_volume.py b/esmvalcore/preprocessor/_volume.py index 4e9292409a..08fde09828 100644 --- a/esmvalcore/preprocessor/_volume.py +++ b/esmvalcore/preprocessor/_volume.py @@ -283,7 +283,8 @@ def depth_integration(cube: Cube) -> Cube: Requires a 3D cube. The z-coordinate integration is calculated by taking the sum in the z direction of the cell contents multiplied by the cell - thickness. + thickness. The units of the resulting cube are multiplied by the + z-coordinate units. Arguments --------- From b361aee8ba0ba36b3d83f3b38680cf60f02c5737 Mon Sep 17 00:00:00 2001 From: Manuel Schlund Date: Mon, 17 Jul 2023 16:11:12 +0200 Subject: [PATCH 13/13] Fixed doc --- doc/recipe/preprocessor.rst | 2 +- esmvalcore/preprocessor/_area.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/recipe/preprocessor.rst b/doc/recipe/preprocessor.rst index 0b353e479f..cd23e8d2c1 100644 --- a/doc/recipe/preprocessor.rst +++ b/doc/recipe/preprocessor.rst @@ -1889,7 +1889,7 @@ This function can be used to apply several different operations in the horizontal plane: mean, sum, standard deviation, median, variance, minimum, maximum and root mean square. The operations mean, sum and root mean square are area weighted. -For sums, the units of the resulting cubes are multiplied by m:math:`^2`. +For sums, the units of the resulting cubes are multiplied by m :math:`^2`. Note that this function is applied over the entire dataset. If only a specific region, depth layer or time period is required, then those diff --git a/esmvalcore/preprocessor/_area.py b/esmvalcore/preprocessor/_area.py index e25e761ce2..f47c65e081 100644 --- a/esmvalcore/preprocessor/_area.py +++ b/esmvalcore/preprocessor/_area.py @@ -305,7 +305,7 @@ def area_statistics(cube: Cube, operator: str) -> Cube: +------------+--------------------------------------------------+ Note that for area-weighted sums, the units of the resulting cube will be - multiplied by m:math:`^2`. + multiplied by m :math:`^2`. Parameters ----------