Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allowed ignoring scalar coordinates in multi_model_statistics #1934

Merged
merged 4 commits into from
Feb 17, 2023
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions doc/recipe/preprocessor.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1034,6 +1034,13 @@ calendars, (sub-)daily data with different calendars are not supported.
The preprocessor saves both the input single model files as well as the multi-model
results. In case you do not want to keep the single model files, set the
parameter ``keep_input_datasets`` to ``false`` (default value is ``true``).
To ignore different scalar coordinates in the input datasets, use the option
``ignore_scalar_coords: true``.
This is helpful if you encounter a ``ValueError: Multi-model statistics failed
to merge input cubes into a single array`` with ``Coordinates in
cube.aux_coords (scalar) differ``.
Some special scalar coordinates which are expected to differ across cubes (`p0`
and `ptop`) are always removed.

.. code-block:: yaml

Expand All @@ -1049,6 +1056,7 @@ parameter ``keep_input_datasets`` to ``false`` (default value is ``true``).
statistics: [mean, median]
exclude: [NCEP-NCAR-R1]
keep_input_datasets: false
ignore_scalar_coords: true

Multi-model statistics also supports a ``groupby`` argument. You can group by
any dataset key (``project``, ``experiment``, etc.) or a combination of keys in a list. You can
Expand Down
33 changes: 29 additions & 4 deletions esmvalcore/_recipe/check.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,8 +247,17 @@ def _verify_groupby(groupby):
def _verify_keep_input_datasets(keep_input_datasets):
if not isinstance(keep_input_datasets, bool):
raise RecipeError(
"Invalid value encountered for `keep_input_datasets`."
f"Must be defined as a boolean. Got {keep_input_datasets}.")
f"Invalid value encountered for `keep_input_datasets`."
f"Must be defined as a boolean (true or false). "
f"Got {keep_input_datasets}.")


def _verify_ignore_scalar_coords(ignore_scalar_coords):
if not isinstance(ignore_scalar_coords, bool):
raise RecipeError(
f"Invalid value encountered for `ignore_scalar_coords`."
f"Must be defined as a boolean (true or false). Got "
f"{ignore_scalar_coords}.")


def _verify_arguments(given, expected):
Expand All @@ -262,7 +271,13 @@ def _verify_arguments(given, expected):

def multimodel_statistics_preproc(settings):
"""Check that the multi-model settings are valid."""
valid_keys = ['span', 'groupby', 'statistics', 'keep_input_datasets']
valid_keys = [
'groupby',
'ignore_scalar_coords',
'keep_input_datasets',
'span',
'statistics',
]
_verify_arguments(settings.keys(), valid_keys)

span = settings.get('span', None) # optional, default: overlap
Expand All @@ -280,10 +295,17 @@ def multimodel_statistics_preproc(settings):
keep_input_datasets = settings.get('keep_input_datasets', True)
_verify_keep_input_datasets(keep_input_datasets)

ignore_scalar_coords = settings.get('ignore_scalar_coords', False)
_verify_ignore_scalar_coords(ignore_scalar_coords)


def ensemble_statistics_preproc(settings):
"""Check that the ensemble settings are valid."""
valid_keys = ['statistics', 'span']
valid_keys = [
'ignore_scalar_coords',
'span',
'statistics',
]
_verify_arguments(settings.keys(), valid_keys)

span = settings.get('span', 'overlap') # optional, default: overlap
Expand All @@ -294,6 +316,9 @@ def ensemble_statistics_preproc(settings):
if statistics:
_verify_statistics(statistics, 'ensemble_statistics')

ignore_scalar_coords = settings.get('ignore_scalar_coords', False)
_verify_ignore_scalar_coords(ignore_scalar_coords)


def _check_delimiter(timerange):
if len(timerange) != 2:
Expand Down
80 changes: 58 additions & 22 deletions esmvalcore/preprocessor/_multimodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -311,7 +311,7 @@ def _get_equal_coord_names_metadata(cubes, equal_coords_metadata):
return equal_names_metadata


def _equalise_coordinate_metadata(cubes):
def _equalise_coordinate_metadata(cubes, ignore_scalar_coords=False):
"""Equalise coordinates in cubes (in-place)."""
if not cubes:
return
Expand Down Expand Up @@ -354,11 +354,16 @@ def _equalise_coordinate_metadata(cubes):
# Note: remaining differences will raise an error at a later stage
coord.long_name = None

# Additionally remove specific scalar coordinates which are not
# expected to be equal in the input cubes
scalar_coords_to_remove = ['p0', 'ptop']
# Remove scalar coordinates if desired. In addition, always remove
# specific scalar coordinates which are not expected to be equal in the
# input cubes.
scalar_coords_to_always_remove = ['p0', 'ptop']
valeriupredoi marked this conversation as resolved.
Show resolved Hide resolved
for scalar_coord in cube.coords(dimensions=()):
if scalar_coord.var_name in scalar_coords_to_remove:
remove_coord = (
ignore_scalar_coords or
scalar_coord.var_name in scalar_coords_to_always_remove
)
if remove_coord:
cube.remove_coord(scalar_coord)


Expand Down Expand Up @@ -406,7 +411,7 @@ def _equalise_var_metadata(cubes):
setattr(cube, attr, equal_names_metadata[cube_id][attr])


def _combine(cubes):
def _combine(cubes, ignore_scalar_coords=False):
"""Merge iris cubes into a single big cube with new dimension.

This assumes that all input cubes have the same shape.
Expand All @@ -417,7 +422,9 @@ def _combine(cubes):
equalise_attributes(cubes)
_equalise_var_metadata(cubes)
_equalise_cell_methods(cubes)
_equalise_coordinate_metadata(cubes)
_equalise_coordinate_metadata(
cubes, ignore_scalar_coords=ignore_scalar_coords
)
_equalise_fx_variables(cubes)

for i, cube in enumerate(cubes):
Expand Down Expand Up @@ -479,7 +486,7 @@ def _compute_slices(cubes):


def _compute_eager(cubes: list, *, operator: iris.analysis.Aggregator,
**kwargs):
ignore_scalar_coords=False, **kwargs):
"""Compute statistics one slice at a time."""
_ = [cube.data for cube in cubes] # make sure the cubes' data are realized

Expand All @@ -489,7 +496,10 @@ def _compute_eager(cubes: list, *, operator: iris.analysis.Aggregator,
single_model_slices = cubes # scalar cubes
else:
single_model_slices = [cube[chunk] for cube in cubes]
combined_slice = _combine(single_model_slices)
combined_slice = _combine(
single_model_slices,
ignore_scalar_coords=ignore_scalar_coords,
)
with warnings.catch_warnings():
warnings.filterwarnings(
'ignore',
Expand Down Expand Up @@ -542,7 +552,7 @@ def _compute_eager(cubes: list, *, operator: iris.analysis.Aggregator,
return result_cube


def _multicube_statistics(cubes, statistics, span):
def _multicube_statistics(cubes, statistics, span, ignore_scalar_coords=False):
"""Compute statistics over multiple cubes.

Can be used e.g. for ensemble or multi-model statistics.
Expand Down Expand Up @@ -580,6 +590,7 @@ def _multicube_statistics(cubes, statistics, span):

result_cube = _compute_eager(aligned_cubes,
operator=operator,
ignore_scalar_coords=ignore_scalar_coords,
**kwargs)
statistics_cubes[statistic] = result_cube

Expand All @@ -590,16 +601,20 @@ def _multiproduct_statistics(products,
statistics,
output_products,
span=None,
keep_input_datasets=None):
keep_input_datasets=None,
ignore_scalar_coords=False):
"""Compute multi-cube statistics on ESMValCore products.

Extract cubes from products, calculate multicube statistics and
assign the resulting output cubes to the output_products.
"""
cubes = [cube for product in products for cube in product.cubes]
statistics_cubes = _multicube_statistics(cubes=cubes,
statistics=statistics,
span=span)
statistics_cubes = _multicube_statistics(
cubes=cubes,
statistics=statistics,
span=span,
ignore_scalar_coords=ignore_scalar_coords,
)
statistics_products = set()
for statistic, cube in statistics_cubes.items():
statistics_product = output_products[statistic]
Expand All @@ -622,7 +637,8 @@ def multi_model_statistics(products,
statistics,
output_products=None,
groupby=None,
keep_input_datasets=True):
keep_input_datasets=True,
ignore_scalar_coords=False):
"""Compute multi-model statistics.

This function computes multi-model statistics on a list of ``products``,
Expand Down Expand Up @@ -667,10 +683,12 @@ def multi_model_statistics(products,
:attr:`~iris.coords.DimCoord.circular` is set to ``False``. For all other
coordinates, :attr:`~iris.coords.Coord.long_name` is removed,
:attr:`~iris.coords.Coord.attributes` deleted and
:attr:`~iris.coords.DimCoord.circular` is set to ``False``. Please note
that some special scalar coordinates which are expected to differe across
cubes(ancillary coordinates for derived coordinates like `p0` and `ptop`)
are removed as well.
:attr:`~iris.coords.DimCoord.circular` is set to ``False``. Differing
scalar coordinates can be ignored with the option
``ignore_scalar_coords``. Please note that some special scalar
coordinates which are expected to differ across cubes (ancillary
coordinates for derived coordinates like `p0` and `ptop`) are always
removed.

Notes
-----
Expand Down Expand Up @@ -701,6 +719,13 @@ def multi_model_statistics(products,
keep_input_datasets: bool
If True, the output will include the input datasets.
If False, only the computed statistics will be returned.
ignore_scalar_coords: bool
If True, remove any scalar coordinate in the input datasets before
merging the input cubes into the multi-dataset cube. The resulting
multi-dataset cube will have no scalar coordinates (the input datasets
will remain unchanged). If False, scalar coordinates will remain in the
input datasets, which might lead to merge conflicts in case the input
datasets have different scalar coordinates.

Returns
-------
Expand All @@ -719,6 +744,7 @@ def multi_model_statistics(products,
cubes=products,
statistics=statistics,
span=span,
ignore_scalar_coords=ignore_scalar_coords,
)
if all(type(p).__name__ == 'PreprocessorFile' for p in products):
# Avoid circular input: https://stackoverflow.com/q/16964467
Expand All @@ -732,7 +758,8 @@ def multi_model_statistics(products,
statistics=statistics,
output_products=sub_output_products,
span=span,
keep_input_datasets=keep_input_datasets
keep_input_datasets=keep_input_datasets,
ignore_scalar_coords=ignore_scalar_coords,
)

statistics_products |= group_statistics
Expand All @@ -746,7 +773,8 @@ def multi_model_statistics(products,


def ensemble_statistics(products, statistics,
output_products, span='overlap'):
output_products, span='overlap',
ignore_scalar_coords=False):
"""Entry point for ensemble statistics.

An ensemble grouping is performed on the input products.
Expand All @@ -770,6 +798,13 @@ def ensemble_statistics(products, statistics,
Overlap or full; if overlap, statitstics are computed on common time-
span; if full, statistics are computed on full time spans, ignoring
missing data.
ignore_scalar_coords: bool
If True, remove any scalar coordinate in the input datasets before
merging the input cubes into the multi-dataset cube. The resulting
multi-dataset cube will have no scalar coordinates (the input datasets
will remain unchanged). If False, scalar coordinates will remain in the
input datasets, which might lead to merge conflicts in case the input
datasets have different scalar coordinates.

Returns
-------
Expand All @@ -788,5 +823,6 @@ def ensemble_statistics(products, statistics,
statistics=statistics,
output_products=output_products,
groupby=ensemble_grouping,
keep_input_datasets=False
keep_input_datasets=False,
ignore_scalar_coords=ignore_scalar_coords,
)
25 changes: 21 additions & 4 deletions tests/integration/recipe/test_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,18 +259,26 @@ def test_reference_for_bias_preproc_two_refs():
'statistics': ['wrong'],
'span': 'wrong',
'groupby': 'wrong',
'keep_input_datasets': 'wrong'
'keep_input_datasets': 'wrong',
'ignore_scalar_coords': 'wrong',
}


def test_invalid_multi_model_settings():
valid_keys = ['span', 'groupby', 'statistics', 'keep_input_datasets']
valid_keys = [
'groupby',
'ignore_scalar_coords',
'keep_input_datasets',
'span',
'statistics',
]
with pytest.raises(RecipeError) as rec_err:
check._verify_arguments(INVALID_MM_SETTINGS, valid_keys)
assert str(rec_err.value) == (
"Unexpected keyword argument encountered: wrong_parametre. "
"Valid keywords are: "
"['span', 'groupby', 'statistics', 'keep_input_datasets'].")
"['groupby', 'ignore_scalar_coords', 'keep_input_datasets', "
"'span', 'statistics'].")


def test_invalid_multi_model_statistics():
Expand Down Expand Up @@ -307,7 +315,16 @@ def test_invalid_multi_model_keep_input():
INVALID_MM_SETTINGS['keep_input_datasets'])
assert str(rec_err.value) == (
'Invalid value encountered for `keep_input_datasets`.'
'Must be defined as a boolean. Got wrong.')
'Must be defined as a boolean (true or false). Got wrong.')


def test_invalid_multi_model_ignore_scalar_coords():
with pytest.raises(RecipeError) as rec_err:
check._verify_ignore_scalar_coords(
INVALID_MM_SETTINGS['ignore_scalar_coords'])
assert str(rec_err.value) == (
'Invalid value encountered for `ignore_scalar_coords`.'
'Must be defined as a boolean (true or false). Got wrong.')


def test_invalid_ensemble_statistics():
Expand Down
Loading