ESMValGroup · zklaus · Feb 17, 2023 · Feb 16, 2023 · Feb 16, 2023 · Feb 16, 2023
diff --git a/doc/recipe/preprocessor.rst b/doc/recipe/preprocessor.rst
@@ -1034,6 +1034,13 @@ calendars, (sub-)daily data with different calendars are not supported.
 The preprocessor saves both the input single model files as well as the multi-model
 results. In case you do not want to keep the single model files, set the
 parameter ``keep_input_datasets`` to ``false`` (default value is ``true``).
+To ignore different scalar coordinates in the input datasets, use the option
+``ignore_scalar_coords: true``.
+This is helpful if you encounter a ``ValueError: Multi-model statistics failed
+to merge input cubes into a single array`` with ``Coordinates in
+cube.aux_coords (scalar) differ``.
+Some special scalar coordinates which are expected to differ across cubes (`p0`
+and `ptop`) are always removed.
 
 .. code-block:: yaml
 
@@ -1049,6 +1056,7 @@ parameter ``keep_input_datasets`` to ``false`` (default value is ``true``).
           statistics: [mean, median]
           exclude: [NCEP-NCAR-R1]
           keep_input_datasets: false
+          ignore_scalar_coords: true
 
 Multi-model statistics also supports a ``groupby`` argument. You can group by
 any dataset key (``project``, ``experiment``, etc.) or a combination of keys in a list. You can

diff --git a/esmvalcore/_recipe/check.py b/esmvalcore/_recipe/check.py
@@ -247,8 +247,17 @@ def _verify_groupby(groupby):
 def _verify_keep_input_datasets(keep_input_datasets):
     if not isinstance(keep_input_datasets, bool):
         raise RecipeError(
-            "Invalid value encountered for `keep_input_datasets`."
-            f"Must be defined as a boolean. Got {keep_input_datasets}.")
+            f"Invalid value encountered for `keep_input_datasets`."
+            f"Must be defined as a boolean (true or false). "
+            f"Got {keep_input_datasets}.")
+
+
+def _verify_ignore_scalar_coords(ignore_scalar_coords):
+    if not isinstance(ignore_scalar_coords, bool):
+        raise RecipeError(
+            f"Invalid value encountered for `ignore_scalar_coords`."
+            f"Must be defined as a boolean (true or false). Got "
+            f"{ignore_scalar_coords}.")
 
 
 def _verify_arguments(given, expected):
@@ -262,7 +271,13 @@ def _verify_arguments(given, expected):
 
 def multimodel_statistics_preproc(settings):
     """Check that the multi-model settings are valid."""
-    valid_keys = ['span', 'groupby', 'statistics', 'keep_input_datasets']
+    valid_keys = [
+        'groupby',
+        'ignore_scalar_coords',
+        'keep_input_datasets',
+        'span',
+        'statistics',
+    ]
     _verify_arguments(settings.keys(), valid_keys)
 
     span = settings.get('span', None)  # optional, default: overlap
@@ -280,10 +295,17 @@ def multimodel_statistics_preproc(settings):
     keep_input_datasets = settings.get('keep_input_datasets', True)
     _verify_keep_input_datasets(keep_input_datasets)
 
+    ignore_scalar_coords = settings.get('ignore_scalar_coords', False)
+    _verify_ignore_scalar_coords(ignore_scalar_coords)
+
 
 def ensemble_statistics_preproc(settings):
     """Check that the ensemble settings are valid."""
-    valid_keys = ['statistics', 'span']
+    valid_keys = [
+        'ignore_scalar_coords',
+        'span',
+        'statistics',
+    ]
     _verify_arguments(settings.keys(), valid_keys)
 
     span = settings.get('span', 'overlap')  # optional, default: overlap
@@ -294,6 +316,9 @@ def ensemble_statistics_preproc(settings):
     if statistics:
         _verify_statistics(statistics, 'ensemble_statistics')
 
+    ignore_scalar_coords = settings.get('ignore_scalar_coords', False)
+    _verify_ignore_scalar_coords(ignore_scalar_coords)
+
 
 def _check_delimiter(timerange):
     if len(timerange) != 2:

diff --git a/esmvalcore/preprocessor/_multimodel.py b/esmvalcore/preprocessor/_multimodel.py
@@ -311,7 +311,7 @@ def _get_equal_coord_names_metadata(cubes, equal_coords_metadata):
     return equal_names_metadata
 
 
-def _equalise_coordinate_metadata(cubes):
+def _equalise_coordinate_metadata(cubes, ignore_scalar_coords=False):
     """Equalise coordinates in cubes (in-place)."""
     if not cubes:
         return
@@ -354,11 +354,16 @@ def _equalise_coordinate_metadata(cubes):
             # Note: remaining differences will raise an error at a later stage
             coord.long_name = None
 
-        # Additionally remove specific scalar coordinates which are not
-        # expected to be equal in the input cubes
-        scalar_coords_to_remove = ['p0', 'ptop']
+        # Remove scalar coordinates if desired. In addition, always remove
+        # specific scalar coordinates which are not expected to be equal in the
+        # input cubes.
+        scalar_coords_to_always_remove = ['p0', 'ptop']
         for scalar_coord in cube.coords(dimensions=()):
-            if scalar_coord.var_name in scalar_coords_to_remove:
+            remove_coord = (
+                ignore_scalar_coords or
+                scalar_coord.var_name in scalar_coords_to_always_remove
+            )
+            if remove_coord:
                 cube.remove_coord(scalar_coord)
 
 
@@ -406,7 +411,7 @@ def _equalise_var_metadata(cubes):
             setattr(cube, attr, equal_names_metadata[cube_id][attr])
 
 
-def _combine(cubes):
+def _combine(cubes, ignore_scalar_coords=False):
     """Merge iris cubes into a single big cube with new dimension.
 
     This assumes that all input cubes have the same shape.
@@ -417,7 +422,9 @@ def _combine(cubes):
     equalise_attributes(cubes)
     _equalise_var_metadata(cubes)
     _equalise_cell_methods(cubes)
-    _equalise_coordinate_metadata(cubes)
+    _equalise_coordinate_metadata(
+        cubes, ignore_scalar_coords=ignore_scalar_coords
+    )
     _equalise_fx_variables(cubes)
 
     for i, cube in enumerate(cubes):
@@ -479,7 +486,7 @@ def _compute_slices(cubes):
 
 
 def _compute_eager(cubes: list, *, operator: iris.analysis.Aggregator,
-                   **kwargs):
+                   ignore_scalar_coords=False, **kwargs):
     """Compute statistics one slice at a time."""
     _ = [cube.data for cube in cubes]  # make sure the cubes' data are realized
 
@@ -489,7 +496,10 @@ def _compute_eager(cubes: list, *, operator: iris.analysis.Aggregator,
             single_model_slices = cubes  # scalar cubes
         else:
             single_model_slices = [cube[chunk] for cube in cubes]
-        combined_slice = _combine(single_model_slices)
+        combined_slice = _combine(
+            single_model_slices,
+            ignore_scalar_coords=ignore_scalar_coords,
+        )
         with warnings.catch_warnings():
             warnings.filterwarnings(
                 'ignore',
@@ -542,7 +552,7 @@ def _compute_eager(cubes: list, *, operator: iris.analysis.Aggregator,
     return result_cube
 
 
-def _multicube_statistics(cubes, statistics, span):
+def _multicube_statistics(cubes, statistics, span, ignore_scalar_coords=False):
     """Compute statistics over multiple cubes.
 
     Can be used e.g. for ensemble or multi-model statistics.
@@ -580,6 +590,7 @@ def _multicube_statistics(cubes, statistics, span):
 
         result_cube = _compute_eager(aligned_cubes,
                                      operator=operator,
+                                     ignore_scalar_coords=ignore_scalar_coords,
                                      **kwargs)
         statistics_cubes[statistic] = result_cube
 
@@ -590,16 +601,20 @@ def _multiproduct_statistics(products,
                              statistics,
                              output_products,
                              span=None,
-                             keep_input_datasets=None):
+                             keep_input_datasets=None,
+                             ignore_scalar_coords=False):
     """Compute multi-cube statistics on ESMValCore products.
 
     Extract cubes from products, calculate multicube statistics and
     assign the resulting output cubes to the output_products.
     """
     cubes = [cube for product in products for cube in product.cubes]
-    statistics_cubes = _multicube_statistics(cubes=cubes,
-                                             statistics=statistics,
-                                             span=span)
+    statistics_cubes = _multicube_statistics(
+        cubes=cubes,
+        statistics=statistics,
+        span=span,
+        ignore_scalar_coords=ignore_scalar_coords,
+    )
     statistics_products = set()
     for statistic, cube in statistics_cubes.items():
         statistics_product = output_products[statistic]
@@ -622,7 +637,8 @@ def multi_model_statistics(products,
                            statistics,
                            output_products=None,
                            groupby=None,
-                           keep_input_datasets=True):
+                           keep_input_datasets=True,
+                           ignore_scalar_coords=False):
     """Compute multi-model statistics.
 
     This function computes multi-model statistics on a list of ``products``,
@@ -667,10 +683,12 @@ def multi_model_statistics(products,
       :attr:`~iris.coords.DimCoord.circular` is set to ``False``. For all other
       coordinates, :attr:`~iris.coords.Coord.long_name` is removed,
       :attr:`~iris.coords.Coord.attributes` deleted and
-      :attr:`~iris.coords.DimCoord.circular` is set to ``False``. Please note
-      that some special scalar coordinates which are expected to differe across
-      cubes(ancillary coordinates for derived coordinates like `p0` and `ptop`)
-      are removed as well.
+      :attr:`~iris.coords.DimCoord.circular` is set to ``False``. Differing
+      scalar coordinates can be ignored with the option
+      ``ignore_scalar_coords``. Please note that some special scalar
+      coordinates which are expected to differ across cubes (ancillary
+      coordinates for derived coordinates like `p0` and `ptop`) are always
+      removed.
 
     Notes
     -----
@@ -701,6 +719,13 @@ def multi_model_statistics(products,
     keep_input_datasets: bool
         If True, the output will include the input datasets.
         If False, only the computed statistics will be returned.
+    ignore_scalar_coords: bool
+        If True, remove any scalar coordinate in the input datasets before
+        merging the input cubes into the multi-dataset cube. The resulting
+        multi-dataset cube will have no scalar coordinates (the input datasets
+        will remain unchanged). If False, scalar coordinates will remain in the
+        input datasets, which might lead to merge conflicts in case the input
+        datasets have different scalar coordinates.
 
     Returns
     -------
@@ -719,6 +744,7 @@ def multi_model_statistics(products,
             cubes=products,
             statistics=statistics,
             span=span,
+            ignore_scalar_coords=ignore_scalar_coords,
         )
     if all(type(p).__name__ == 'PreprocessorFile' for p in products):
         # Avoid circular input: https://stackoverflow.com/q/16964467
@@ -732,7 +758,8 @@ def multi_model_statistics(products,
                 statistics=statistics,
                 output_products=sub_output_products,
                 span=span,
-                keep_input_datasets=keep_input_datasets
+                keep_input_datasets=keep_input_datasets,
+                ignore_scalar_coords=ignore_scalar_coords,
             )
 
             statistics_products |= group_statistics
@@ -746,7 +773,8 @@ def multi_model_statistics(products,
 
 
 def ensemble_statistics(products, statistics,
-                        output_products, span='overlap'):
+                        output_products, span='overlap',
+                        ignore_scalar_coords=False):
     """Entry point for ensemble statistics.
 
     An ensemble grouping is performed on the input products.
@@ -770,6 +798,13 @@ def ensemble_statistics(products, statistics,
         Overlap or full; if overlap, statitstics are computed on common time-
         span; if full, statistics are computed on full time spans, ignoring
         missing data.
+    ignore_scalar_coords: bool
+        If True, remove any scalar coordinate in the input datasets before
+        merging the input cubes into the multi-dataset cube. The resulting
+        multi-dataset cube will have no scalar coordinates (the input datasets
+        will remain unchanged). If False, scalar coordinates will remain in the
+        input datasets, which might lead to merge conflicts in case the input
+        datasets have different scalar coordinates.
 
     Returns
     -------
@@ -788,5 +823,6 @@ def ensemble_statistics(products, statistics,
         statistics=statistics,
         output_products=output_products,
         groupby=ensemble_grouping,
-        keep_input_datasets=False
+        keep_input_datasets=False,
+        ignore_scalar_coords=ignore_scalar_coords,
     )
diff --git a/tests/integration/recipe/test_check.py b/tests/integration/recipe/test_check.py
@@ -259,18 +259,26 @@ def test_reference_for_bias_preproc_two_refs():
         'statistics': ['wrong'],
         'span': 'wrong',
         'groupby': 'wrong',
-        'keep_input_datasets': 'wrong'
+        'keep_input_datasets': 'wrong',
+        'ignore_scalar_coords': 'wrong',
     }
 
 
 def test_invalid_multi_model_settings():
-    valid_keys = ['span', 'groupby', 'statistics', 'keep_input_datasets']
+    valid_keys = [
+        'groupby',
+        'ignore_scalar_coords',
+        'keep_input_datasets',
+        'span',
+        'statistics',
+    ]
     with pytest.raises(RecipeError) as rec_err:
         check._verify_arguments(INVALID_MM_SETTINGS, valid_keys)
     assert str(rec_err.value) == (
         "Unexpected keyword argument encountered: wrong_parametre. "
         "Valid keywords are: "
-        "['span', 'groupby', 'statistics', 'keep_input_datasets'].")
+        "['groupby', 'ignore_scalar_coords', 'keep_input_datasets', "
+        "'span', 'statistics'].")
 
 
 def test_invalid_multi_model_statistics():
@@ -307,7 +315,16 @@ def test_invalid_multi_model_keep_input():
             INVALID_MM_SETTINGS['keep_input_datasets'])
     assert str(rec_err.value) == (
         'Invalid value encountered for `keep_input_datasets`.'
-        'Must be defined as a boolean. Got wrong.')
+        'Must be defined as a boolean (true or false). Got wrong.')
+
+
+def test_invalid_multi_model_ignore_scalar_coords():
+    with pytest.raises(RecipeError) as rec_err:
+        check._verify_ignore_scalar_coords(
+            INVALID_MM_SETTINGS['ignore_scalar_coords'])
+    assert str(rec_err.value) == (
+        'Invalid value encountered for `ignore_scalar_coords`.'
+        'Must be defined as a boolean (true or false). Got wrong.')
 
 
 def test_invalid_ensemble_statistics():