diff --git a/esmvalcore/preprocessor/_multimodel.py b/esmvalcore/preprocessor/_multimodel.py index fc25490119..01ce7f83ce 100644 --- a/esmvalcore/preprocessor/_multimodel.py +++ b/esmvalcore/preprocessor/_multimodel.py @@ -17,6 +17,7 @@ import iris import iris.coord_categorisation import numpy as np +from iris.coords import DimCoord from iris.cube import Cube, CubeList from iris.exceptions import MergeError from iris.util import equalise_attributes, new_axis @@ -242,13 +243,8 @@ def _equalise_cell_methods(cubes): cube.cell_methods = None -def _equalise_coordinates(cubes): - """Equalise coordinates in cubes (in-place).""" - if not cubes: - return - - # If metadata of a coordinate metadata is equal for all cubes, do not - # modify it; else remove long_name and attributes. +def _get_equal_coords_metadata(cubes): + """Get metadata for exactly matching coordinates across cubes.""" equal_coords_metadata = [] for coord in cubes[0].coords(): for other_cube in cubes[1:]: @@ -260,13 +256,103 @@ def _equalise_coordinates(cubes): break else: equal_coords_metadata.append(coord.metadata) + return equal_coords_metadata + + +def _get_equal_coord_names_metadata(cubes, equal_coords_metadata): + """Get metadata for coords with matching names and units across cubes. + + Note + ---- + Ignore coordinates whose names are not unique. + + """ + equal_names_metadata = {} + for coord in cubes[0].coords(): + coord_name = coord.name() + + # Ignore exactly matching coordinates + if coord.metadata in equal_coords_metadata: + continue + + # Ignore coordinates that are not unique in original cube + if len(cubes[0].coords(coord_name)) > 1: + continue + + # Check if coordinate names and units match across all cubes + for other_cube in cubes[1:]: + + # Ignore names that do not exist in other cube/are not unique + if len(other_cube.coords(coord_name)) != 1: + break + + # Ignore names where units do not match across cubes + if coord.units != other_cube.coord(coord_name).units: + break - # Modify coordinates accordingly + # Coordinate name exists in all other cubes with identical units + # --> Get metadata that is identical across all cubes + else: + std_names = list( + {c.coord(coord_name).standard_name for c in cubes} + ) + long_names = list( + {c.coord(coord_name).long_name for c in cubes} + ) + var_names = list( + {c.coord(coord_name).var_name for c in cubes} + ) + equal_names_metadata[coord_name] = dict( + standard_name=std_names[0] if len(std_names) == 1 else None, + long_name=long_names[0] if len(long_names) == 1 else None, + var_name=var_names[0] if len(var_names) == 1 else None, + ) + + return equal_names_metadata + + +def _equalise_coordinate_metadata(cubes): + """Equalise coordinates in cubes (in-place).""" + if not cubes: + return + + # Filter out coordinates with exactly matching metadata across all cubes + # --> these will not be modified at all + equal_coords_metadata = _get_equal_coords_metadata(cubes) + + # Filter out coordinates with matching names and units + # --> keep matching names of these coordinates + # Note: ignores duplicate coordinates + equal_names_metadata = _get_equal_coord_names_metadata( + cubes, + equal_coords_metadata + ) + + # Modify all coordinates of all cubes accordingly for cube in cubes: for coord in cube.coords(): - if coord.metadata not in equal_coords_metadata: - coord.long_name = None - coord.attributes = None + + # Exactly matching coordinates --> do not modify + if coord.metadata in equal_coords_metadata: + continue + + # Non-exactly matching coordinates --> first, delete attributes and + # circular property + coord.attributes = {} + if isinstance(coord, DimCoord): + coord.circular = False + + # Matching names and units --> set common names + if coord.name() in equal_names_metadata: + equal_names = equal_names_metadata[coord.name()] + coord.standard_name = equal_names['standard_name'] + coord.long_name = equal_names['long_name'] + coord.var_name = equal_names['var_name'] + continue + + # Remaining coordinates --> remove long_name + # Note: remaining differences will raise an error at a later stage + coord.long_name = None # Additionally remove specific scalar coordinates which are not # expected to be equal in the input cubes @@ -293,7 +379,7 @@ def _combine(cubes): # merge_and_concat.html#common-issues-with-merge-and-concatenate equalise_attributes(cubes) _equalise_cell_methods(cubes) - _equalise_coordinates(cubes) + _equalise_coordinate_metadata(cubes) _equalise_fx_variables(cubes) for i, cube in enumerate(cubes): @@ -508,6 +594,30 @@ def multi_model_statistics(products, supported and can be specified like ``pXX.YY`` (for percentile ``XX.YY``; decimal part optional). + This function can handle cubes with differing metadata: + + - :attr:`~iris.cube.Cube.attributes`: Differing attributes are deleted, + see :func:`iris.util.equalise_attributes`. + - :attr:`~iris.cube.Cube.cell_methods`: All cell methods are deleted + prior to combining cubes. + - :meth:`~iris.cube.Cube.cell_measures`: All cell measures are deleted + prior to combining cubes, see + :func:`esmvalcore.preprocessor.remove_fx_variables`. + - :meth:`~iris.cube.Cube.ancillary_variables`: All ancillary variables + are deleted prior to combining cubes, see + :func:`esmvalcore.preprocessor.remove_fx_variables`. + - :meth:`~iris.cube.Cube.coords`: Exactly identical coordinates are + preserved. For coordinates with equal :meth:`~iris.coords.Coord.name` and + :attr:`~iris.coords.Coord.units`, names are equalized, + :attr:`~iris.coords.Coord.attributes` deleted and + :attr:`~iris.coords.DimCoord.circular` is set to ``False``. For all other + coordinates, :attr:`~iris.coords.Coord.long_name` is removed, + :attr:`~iris.coords.Coord.attributes` deleted and + :attr:`~iris.coords.DimCoord.circular` is set to ``False``. Please note + that some special scalar coordinates which are expected to differe across + cubes(ancillary coordinates for derived coordinates like `p0` and `ptop`) + are removed as well. + Notes ----- Some of the operators in :py:mod:`iris.analysis` require additional diff --git a/tests/unit/preprocessor/_multimodel/test_multimodel.py b/tests/unit/preprocessor/_multimodel/test_multimodel.py index 29c2a60f9a..7824e2eacc 100644 --- a/tests/unit/preprocessor/_multimodel/test_multimodel.py +++ b/tests/unit/preprocessor/_multimodel/test_multimodel.py @@ -177,19 +177,27 @@ def get_cube_for_equal_coords_test(num_cubes): cube = generate_cube_from_dates('monthly') cubes.append(cube) - # Create cubes that have one equal coordinate ('year') and one non-equal - # coordinate ('x') + # Create cubes that have one exactly equal coordinate ('year'), one + # coordinate with matching names ('m') and one coordinate with non-matching + # names year_coord = AuxCoord([1, 2, 3], var_name='year', long_name='year', units='1', attributes={'test': 1}) + m_coord = AuxCoord([1, 2, 3], var_name='m', long_name='m', units='s', + attributes={'test': 0}) x_coord = AuxCoord([1, 2, 3], var_name='x', long_name='x', units='s', attributes={'test': 2}) for (idx, cube) in enumerate(cubes): + new_m_coord = m_coord.copy() + new_m_coord.var_name = f'm_{idx}' new_x_coord = x_coord.copy() new_x_coord.long_name = f'x_{idx}' cube.add_aux_coord(year_coord.copy(), 0) + cube.add_aux_coord(new_m_coord, 0) cube.add_aux_coord(new_x_coord, 0) assert cube.coord('year').metadata is not year_coord.metadata assert cube.coord('year').metadata == year_coord.metadata + assert cube.coord('m').metadata is not m_coord.metadata + assert cube.coord('m').metadata != m_coord.metadata assert cube.coord(f'x_{idx}').metadata is not x_coord.metadata assert cube.coord(f'x_{idx}').metadata != x_coord.metadata @@ -525,33 +533,46 @@ def test_combine_with_scalar_coords_to_remove(scalar_coord): assert merged_cube.shape == (5, 3) -def test_combine_preserve_equal_coordinates(): +def test_combine_equal_coordinates(): """Test ``_combine`` with equal input coordinates.""" cubes = get_cube_for_equal_coords_test(5) merged_cube = mm._combine(cubes) - # The equal coordinate ('year') was not changed; the non-equal one ('x') - # does not have a long_name and attributes anymore + # The equal coordinate ('year') was not changed assert merged_cube.coord('year').var_name == 'year' assert merged_cube.coord('year').standard_name is None assert merged_cube.coord('year').long_name == 'year' assert merged_cube.coord('year').attributes == {'test': 1} + + +def test_combine_non_equal_coordinates(): + """Test ``_combine`` with non-equal input coordinates.""" + cubes = get_cube_for_equal_coords_test(5) + merged_cube = mm._combine(cubes) + + # The var_name of the matching name coordinate ('m') has been removed, and + # the non-equal one ('x') does not have a long_name anymore + # Both coordinates lost their attributes + assert merged_cube.coord('m').var_name is None + assert merged_cube.coord('m').standard_name is None + assert merged_cube.coord('m').long_name == 'm' + assert merged_cube.coord('m').attributes == {} assert merged_cube.coord('x').var_name == 'x' assert merged_cube.coord('x').standard_name is None assert merged_cube.coord('x').long_name is None assert merged_cube.coord('x').attributes == {} -def test_equalise_coordinates_no_cubes(): - """Test that _equalise_coordinates doesn't fail with empty cubes.""" - mm._equalise_coordinates([]) +def test_equalise_coordinate_metadata_no_cubes(): + """Test _equalise_coordinate_metadata doesn't fail with empty cubes.""" + mm._equalise_coordinate_metadata([]) -def test_equalise_coordinates_one_cube(): - """Test that _equalise_coordinates doesn't fail with a single cubes.""" +def test_equalise_coordinate_metadata_one_cube(): + """Test _equalise_coordinate_metadata doesn't fail with a single cubes.""" cube = generate_cube_from_dates('monthly') new_cube = cube.copy() - mm._equalise_coordinates([new_cube]) + mm._equalise_coordinate_metadata([new_cube]) assert new_cube is not cube assert new_cube == cube @@ -935,29 +956,6 @@ def test_map_to_new_time_int_coords(): assert np.issubdtype(out_cube.coord('decade').dtype, np.integer) -def test_preserve_equal_coordinates(): - """Test ``multi_model_statistics`` with equal input coordinates.""" - cubes = get_cube_for_equal_coords_test(5) - stat_cubes = multi_model_statistics(cubes, span='overlap', - statistics=['sum']) - - assert len(stat_cubes) == 1 - assert 'sum' in stat_cubes - stat_cube = stat_cubes['sum'] - assert_array_allclose(stat_cube.data, np.ma.array([5.0, 5.0, 5.0])) - - # The equal coordinate ('year') was not changed; the non-equal one ('x') - # does not have a long_name and attributes anymore - assert stat_cube.coord('year').var_name == 'year' - assert stat_cube.coord('year').standard_name is None - assert stat_cube.coord('year').long_name == 'year' - assert stat_cube.coord('year').attributes == {'test': 1} - assert stat_cube.coord('x').var_name == 'x' - assert stat_cube.coord('x').standard_name is None - assert stat_cube.coord('x').long_name is None - assert stat_cube.coord('x').attributes == {} - - def test_arbitrary_dims_5d(cubes_5d): """Test ``multi_model_statistics`` with 5D cubes.""" stat_cubes = multi_model_statistics( @@ -1034,6 +1032,176 @@ def test_arbitrary_dims_0d(cubes_with_arbitrary_dimensions): assert_array_allclose(stat_cube.data, np.ma.array(0.0)) +def test_preserve_equal_coordinates(): + """Test ``multi_model_statistics`` with equal input coordinates.""" + cubes = get_cube_for_equal_coords_test(5) + stat_cubes = multi_model_statistics(cubes, span='overlap', + statistics=['sum']) + + assert len(stat_cubes) == 1 + stat_cube = stat_cubes['sum'] + assert_array_allclose(stat_cube.data, np.ma.array([5.0, 5.0, 5.0])) + + # The equal coordinate 'year' was not changed + assert stat_cube.coord('year').var_name == 'year' + assert stat_cube.coord('year').standard_name is None + assert stat_cube.coord('year').long_name == 'year' + assert stat_cube.coord('year').attributes == {'test': 1} + + +def test_preserve_non_equal_coordinates(): + """Test ``multi_model_statistics`` with non_equal input coordinates.""" + cubes = get_cube_for_equal_coords_test(5) + + # Use "circular" attribute for one cube to check that it is set to "False" + # for each cube + cubes[2].coord('time').circular = False + + stat_cubes = multi_model_statistics(cubes, span='overlap', + statistics=['sum']) + + assert len(stat_cubes) == 1 + stat_cube = stat_cubes['sum'] + assert_array_allclose(stat_cube.data, np.ma.array([5.0, 5.0, 5.0])) + + # The attributes and circular property of the non-equal coordinate 'time' + # (due to differing circular) have been removed + assert stat_cube.coord('time').attributes == {} + assert stat_cube.coord('time').circular is False + + # The long_name and attributes of the non-equal coordinate 'x' have been + # removed + assert stat_cube.coord('x').var_name == 'x' + assert stat_cube.coord('x').standard_name is None + assert stat_cube.coord('x').long_name is None + assert stat_cube.coord('x').attributes == {} + + +@pytest.mark.parametrize( + 'equal_names', + [ + ['var_name'], + ['standard_name'], + ['long_name'], + ['var_name', 'standard_name'], + ['var_name', 'long_name'], + ['standard_name', 'long_name'], + ['var_name', 'standard_name', 'long_name'], + ] +) +def test_preserve_equal_name_coordinates(equal_names): + """Test ``multi_model_statistics`` with equal-name coordinates.""" + all_names = ['var_name', 'standard_name', 'long_name'] + cubes = CubeList(generate_cube_from_dates('monthly') for _ in range(5)) + + # Prepare names of coordinates of input cubes accordingly + for (idx, cube) in enumerate(cubes): + time_coord = cube.coord('time') + for name in all_names: + if name in equal_names or idx != 0: + setattr(time_coord, name, 'time') + else: # Different value for first cube if non-equal name + setattr(time_coord, name, None) + + # Use different coordinate attributes for each cube so the different + # coordinates are not exactly identical + time_coord.attributes = {'test': idx} + + stat_cubes = multi_model_statistics(cubes, span='overlap', + statistics=['sum']) + + assert len(stat_cubes) == 1 + stat_cube = stat_cubes['sum'] + assert_array_allclose(stat_cube.data, np.ma.array([5.0, 5.0, 5.0])) + + assert len(stat_cube.coords()) == 1 + time_coord = stat_cube.coords()[0] + + for name in all_names: + if name in equal_names: + assert getattr(time_coord, name) == 'time' + else: + assert getattr(time_coord, name) is None + assert time_coord.name() == 'time' + assert time_coord.units == 'days since 1850-01-01' + assert time_coord.attributes == {} + + +def test_ignore_equal_coordinates(): + """Test ``_get_equal_coord_names_metadata``.""" + cubes = CubeList(generate_cube_from_dates('monthly') for _ in range(5)) + + equal_coords_metadata = [cubes[0].coord('time').metadata] + equal_names_metadata = mm._get_equal_coord_names_metadata( + cubes, + equal_coords_metadata, + ) + + # The equal_names_metadata dict should be empty since the exactly identical + # coordinate should be ignored + assert not equal_names_metadata + + +@pytest.mark.parametrize('cube_idx', [0, 1, 2, 3, 4]) +def test_ignore_duplicate_equal_name_coordinates(cube_idx): + """Test ``_get_equal_coord_names_metadata``.""" + cubes = CubeList(generate_cube_from_dates('monthly') for _ in range(5)) + + # Add duplicate scalar coordinate + d_coord_0 = AuxCoord( + 0.0, + var_name='d', + long_name='d', + units='m', + attributes={'test': 1} + ) + d_coord_1 = AuxCoord( + 1.0, + var_name='d', + long_name='d', + units='m', + ) + for cube in cubes: + cube.add_aux_coord(d_coord_0, ()) + cubes[cube_idx].add_aux_coord(d_coord_1, ()) + + equal_names_metadata = mm._get_equal_coord_names_metadata(cubes, []) + + # The equal_names_metadata dict should only contain the equal 'time' + # dimension, not the duplicate dimension + assert len(equal_names_metadata) == 1 + assert 'time' in equal_names_metadata + + +def test_ignore_non_existing_coordinates(): + """Test ``_get_equal_coord_names_metadata``.""" + cubes = CubeList(generate_cube_from_dates('monthly') for _ in range(5)) + + # Add coordinate only for first cube + cubes[0].add_aux_coord(AuxCoord(0.0, long_name='x'), ()) + + equal_names_metadata = mm._get_equal_coord_names_metadata(cubes, []) + + # The equal_names_metadata dict should only contain the equal 'time' + # dimension, not the coordinate that only exists for the first cube + assert len(equal_names_metadata) == 1 + assert 'time' in equal_names_metadata + + +def test_ignore_coordinates_different_units(): + """Test ``_get_equal_coord_names_metadata``.""" + cubes = CubeList(generate_cube_from_dates('monthly') for _ in range(5)) + + # Adapt time units of one cube + cubes[3].coord('time').units = 'days since 1900-01-01' + + equal_names_metadata = mm._get_equal_coord_names_metadata(cubes, []) + + # The equal_names_metadata dict should be empty since the time units do not + # match + assert not equal_names_metadata + + def test_empty_input_multi_model_statistics(): """Check that ``multi_model_statistics`` fails with empty input.""" msg = "Cannot perform multicube statistics for an empty list of cubes"