From b0fbd756161e7fa1da7ee300438895519c985d95 Mon Sep 17 00:00:00 2001 From: "stephen.worsley" Date: Fri, 13 Oct 2023 12:06:54 +0100 Subject: [PATCH 01/21] refactor area weighted regridding --- lib/iris/analysis/_area_weighted.py | 452 +++++++----------- .../test_AreaWeightedRegridder.py | 3 +- 2 files changed, 172 insertions(+), 283 deletions(-) diff --git a/lib/iris/analysis/_area_weighted.py b/lib/iris/analysis/_area_weighted.py index edead3948a..03ca43c4e8 100644 --- a/lib/iris/analysis/_area_weighted.py +++ b/lib/iris/analysis/_area_weighted.py @@ -8,6 +8,7 @@ import cf_units import numpy as np import numpy.ma as ma +from scipy.sparse import csr_array from iris._lazy_data import map_complete_blocks from iris.analysis._interpolation import get_xy_dim_coords, snapshot_grid @@ -76,8 +77,7 @@ def __init__(self, src_grid_cube, target_grid_cube, mdtol=1): self.grid_y, self.meshgrid_x, self.meshgrid_y, - self.weights_info, - self.index_info, + self.weights, ) = _regrid_info def __call__(self, cube): @@ -126,8 +126,7 @@ def __call__(self, cube): self.grid_y, self.meshgrid_x, self.meshgrid_y, - self.weights_info, - self.index_info, + self.weights, ) return _regrid_area_weighted_rectilinear_src_and_grid__perform( cube, _regrid_info, mdtol=self._mdtol @@ -450,7 +449,7 @@ def _get_bounds_in_units(coord, units, dtype): """Return a copy of coord's bounds in the specified units and dtype.""" # The bounds are cast to dtype before conversion to prevent issues when # mixing float32 and float64 types. - return coord.units.convert(coord.bounds.astype(dtype), units).astype(dtype) + return coord.units.convert(coord.contiguous_bounds().astype(dtype), units).astype(dtype) def _weighted_mean_with_mdtol(data, weights, axis=None, mdtol=0): @@ -773,293 +772,49 @@ def _regrid_area_weighted_rectilinear_src_and_grid__prepare( grid_x_bounds = _get_bounds_in_units(grid_x, x_units, dtype) grid_y_bounds = _get_bounds_in_units(grid_y, y_units, dtype) + # TODO: consider removing this. # Create 2d meshgrids as required by _create_cube func. meshgrid_x, meshgrid_y = _meshgrid(grid_x.points, grid_y.points) - # Determine whether target grid bounds are decreasing. This must - # be determined prior to wrap_lons being called. - grid_x_decreasing = grid_x_bounds[-1, 0] < grid_x_bounds[0, 0] - grid_y_decreasing = grid_y_bounds[-1, 0] < grid_y_bounds[0, 0] - # Wrapping of longitudes. if spherical: - base = np.min(src_x_bounds) modulus = x_units.modulus - # Only wrap if necessary to avoid introducing floating - # point errors. - if np.min(grid_x_bounds) < base or np.max(grid_x_bounds) > ( - base + modulus - ): - grid_x_bounds = iris.analysis.cartography.wrap_lons( - grid_x_bounds, base, modulus - ) + else: + modulus = None # Determine whether the src_x coord has periodic boundary conditions. circular = getattr(src_x, "circular", False) - - # Use simple cartesian area function or one that takes into - # account the curved surface if coord system is spherical. - if spherical: - area_func = _spherical_area - else: - area_func = _cartesian_area - def _calculate_regrid_area_weighted_weights( - src_x_bounds, - src_y_bounds, - grid_x_bounds, - grid_y_bounds, - grid_x_decreasing, - grid_y_decreasing, - area_func, - circular=False, + src_x_bounds, + src_y_bounds, + grid_x_bounds, + grid_y_bounds, + spherical, + circular_x=False, + modulus=None, ): - """ - Compute the area weights used for area-weighted regridding. - Args: - * src_x_bounds: - A NumPy array of bounds along the X axis defining the source grid. - * src_y_bounds: - A NumPy array of bounds along the Y axis defining the source grid. - * grid_x_bounds: - A NumPy array of bounds along the X axis defining the new grid. - * grid_y_bounds: - A NumPy array of bounds along the Y axis defining the new grid. - * grid_x_decreasing: - Boolean indicating whether the X coordinate of the new grid is - in descending order. - * grid_y_decreasing: - Boolean indicating whether the Y coordinate of the new grid is - in descending order. - * area_func: - A function that returns an (p, q) array of weights given an (p, 2) - shaped array of Y bounds and an (q, 2) shaped array of X bounds. - Kwargs: - * circular: - A boolean indicating whether the `src_x_bounds` are periodic. - Default is False. - Returns: - The area weights to be used for area-weighted regridding. - """ - # Determine which grid bounds are within src extent. - y_within_bounds = _within_bounds( - src_y_bounds, grid_y_bounds, grid_y_decreasing - ) - x_within_bounds = _within_bounds( - src_x_bounds, grid_x_bounds, grid_x_decreasing - ) - - # Cache which src_bounds are within grid bounds - cached_x_bounds = [] - cached_x_indices = [] - max_x_indices = 0 - for x_0, x_1 in grid_x_bounds: - if grid_x_decreasing: - x_0, x_1 = x_1, x_0 - x_bounds, x_indices = _cropped_bounds(src_x_bounds, x_0, x_1) - cached_x_bounds.append(x_bounds) - cached_x_indices.append(x_indices) - # Keep record of the largest slice - if isinstance(x_indices, slice): - x_indices_size = np.sum(x_indices.stop - x_indices.start) - else: # is tuple of indices - x_indices_size = len(x_indices) - if x_indices_size > max_x_indices: - max_x_indices = x_indices_size - - # Cache which y src_bounds areas and weights are within grid bounds - cached_y_indices = [] - cached_weights = [] - max_y_indices = 0 - for j, (y_0, y_1) in enumerate(grid_y_bounds): - # Reverse lower and upper if dest grid is decreasing. - if grid_y_decreasing: - y_0, y_1 = y_1, y_0 - y_bounds, y_indices = _cropped_bounds(src_y_bounds, y_0, y_1) - cached_y_indices.append(y_indices) - # Keep record of the largest slice - if isinstance(y_indices, slice): - y_indices_size = np.sum(y_indices.stop - y_indices.start) - else: # is tuple of indices - y_indices_size = len(y_indices) - if y_indices_size > max_y_indices: - max_y_indices = y_indices_size - - weights_i = [] - for i, (x_0, x_1) in enumerate(grid_x_bounds): - # Reverse lower and upper if dest grid is decreasing. - if grid_x_decreasing: - x_0, x_1 = x_1, x_0 - x_bounds = cached_x_bounds[i] - x_indices = cached_x_indices[i] - - # Determine whether element i, j overlaps with src and hence - # an area weight should be computed. - # If x_0 > x_1 then we want [0]->x_1 and x_0->[0] + mod in the case - # of wrapped longitudes. However if the src grid is not global - # (i.e. circular) this new cell would include a region outside of - # the extent of the src grid and thus the weight is therefore - # invalid. - outside_extent = x_0 > x_1 and not circular - if ( - outside_extent - or not y_within_bounds[j] - or not x_within_bounds[i] - ): - weights = False - else: - # Calculate weights based on areas of cropped bounds. - if isinstance(x_indices, tuple) and isinstance( - y_indices, tuple - ): - raise RuntimeError( - "Cannot handle split bounds " "in both x and y." - ) - weights = area_func(y_bounds, x_bounds) - weights_i.append(weights) - cached_weights.append(weights_i) - return ( - tuple(cached_x_indices), - tuple(cached_y_indices), - max_x_indices, - max_y_indices, - tuple(cached_weights), + src_shape = (len(src_x_bounds) - 1, len(src_y_bounds) - 1) + tgt_shape = (len(grid_x_bounds) - 1, len(grid_y_bounds) - 1) + + if spherical: + src_y_bounds = np.sin(src_y_bounds) + grid_y_bounds = np.sin(grid_y_bounds) + x_info = _get_coord_to_coord_matrix( + src_x_bounds, grid_x_bounds, circular=circular_x, mod=modulus ) + y_info = _get_coord_to_coord_matrix(src_y_bounds, grid_y_bounds) + weights_matrix = _combine_xy_weights(x_info, y_info, src_shape, tgt_shape) + return weights_matrix - ( - cached_x_indices, - cached_y_indices, - max_x_indices, - max_y_indices, - cached_weights, - ) = _calculate_regrid_area_weighted_weights( + weights = _calculate_regrid_area_weighted_weights( src_x_bounds, src_y_bounds, grid_x_bounds, grid_y_bounds, - grid_x_decreasing, - grid_y_decreasing, - area_func, + spherical, circular, + modulus, ) - - # Go further, calculating the full weights array that we'll need in the - # perform step and the indices we'll need to extract from the cube we're - # regridding (src_data) - - result_y_extent = len(grid_y_bounds) - result_x_extent = len(grid_x_bounds) - - # Total number of points - num_target_pts = result_y_extent * result_x_extent - - # Create empty array to hold weights - src_area_weights = np.zeros( - list((max_y_indices, max_x_indices, num_target_pts)) - ) - - # Built for the case where the source cube isn't masked - blank_weights = np.zeros((num_target_pts,)) - new_data_mask_basis = np.full( - (len(cached_y_indices), len(cached_x_indices)), False, dtype=np.bool_ - ) - - # To permit fancy indexing, we need to store our data in an array whose - # first two dimensions represent the indices needed for the target cell. - # Since target cells can require a different number of indices, the size of - # these dimensions should be the maximum of this number. - # This means we need to track whether the data in - # that array is actually required and build those squared-off arrays - # TODO: Consider if a proper mask would be better - src_area_datas_required = np.full( - (max_y_indices, max_x_indices, num_target_pts), False - ) - square_data_indices_y = np.zeros( - (max_y_indices, max_x_indices, num_target_pts), dtype=int - ) - square_data_indices_x = np.zeros( - (max_y_indices, max_x_indices, num_target_pts), dtype=int - ) - - # Stack the weights for each target point and build the indices we'll need - # to extract the src_area_data - target_pt_ji = -1 - for j, y_indices in enumerate(cached_y_indices): - for i, x_indices in enumerate(cached_x_indices): - target_pt_ji += 1 - # Determine whether to mask element i, j based on whether - # there are valid weights. - weights = cached_weights[j][i] - if weights is False: - # Prepare for the src_data not being masked by storing the - # information that will let us fill the data with zeros and - # weights as one. The weighted average result will be the same, - # but we avoid dividing by zero. - blank_weights[target_pt_ji] = True - new_data_mask_basis[j, i] = True - else: - # Establish which indices are actually in y_indices and x_indices - if isinstance(y_indices, slice): - y_indices = list( - range( - y_indices.start, - y_indices.stop, - y_indices.step or 1, - ) - ) - else: - y_indices = list(y_indices) - - if isinstance(x_indices, slice): - x_indices = list( - range( - x_indices.start, - x_indices.stop, - x_indices.step or 1, - ) - ) - else: - x_indices = list(x_indices) - - # For the weights, we just need the lengths of these as we're - # dropping them into a pre-made array - - len_y = len(y_indices) - len_x = len(x_indices) - - src_area_weights[0:len_y, 0:len_x, target_pt_ji] = weights - - # To build the indices for the source cube, we need equal - # shaped array so we pad with 0s and record the need to mask - # them in src_area_datas_required - padded_y_indices = y_indices + [0] * (max_y_indices - len_y) - padded_x_indices = x_indices + [0] * (max_x_indices - len_x) - - square_data_indices_y[..., target_pt_ji] = np.array( - padded_y_indices - )[:, np.newaxis] - square_data_indices_x[..., target_pt_ji] = padded_x_indices - - src_area_datas_required[0:len_y, 0:len_x, target_pt_ji] = True - - # Package up the return data - - weights_info = ( - blank_weights, - src_area_weights, - new_data_mask_basis, - ) - - index_info = ( - result_x_extent, - result_y_extent, - square_data_indices_y, - square_data_indices_x, - src_area_datas_required, - ) - - # Now return it - return ( src_x, src_y, @@ -1069,8 +824,7 @@ def _calculate_regrid_area_weighted_weights( grid_y, meshgrid_x, meshgrid_y, - weights_info, - index_info, + weights, ) @@ -1092,17 +846,18 @@ def _regrid_area_weighted_rectilinear_src_and_grid__perform( grid_y, meshgrid_x, meshgrid_y, - weights_info, - index_info, + weights, ) = regrid_info + tgt_shape = (len(grid_x.points), len(grid_y.points)) + # Calculate new data array for regridded cube. regrid = functools.partial( - _regrid_area_weighted_array, + _regrid_along_dims, x_dim=src_x_dim, y_dim=src_y_dim, - weights_info=weights_info, - index_info=index_info, + weights=weights, + tgt_shape=tgt_shape, mdtol=mdtol, ) @@ -1122,8 +877,8 @@ def _regrid_area_weighted_rectilinear_src_and_grid__perform( # TODO: investigate if an area weighted callback would be more appropriate. # _regrid_callback = functools.partial( # _regrid_area_weighted_array, - # weights_info=weights_info, - # index_info=index_info, + # weights=weights, + # tgt_shape=tgt_shape, # mdtol=mdtol, # ) @@ -1150,3 +905,136 @@ def regrid_callback(*args, **kwargs): new_cube = new_cube[tuple(indices)] return new_cube + + +def _get_coord_to_coord_matrix( + src_bounds, tgt_bounds, circular=False, mod=None +): + m = len(tgt_bounds) - 1 + n = len(src_bounds) - 1 + + src_decreasing = src_bounds[0] > src_bounds[1] + tgt_decreasing = tgt_bounds[0] > tgt_bounds[1] + if src_decreasing: + src_bounds = src_bounds[::-1] + if tgt_decreasing: + tgt_bounds = tgt_bounds[::-1] + + if circular: + adjust = (src_bounds.min() - tgt_bounds.min()) // mod + src_bounds = src_bounds + (mod * adjust) + src_bounds = np.append(src_bounds, src_bounds + mod) + nn = (2 * n) + 1 + else: + nn = n + + i = max(np.searchsorted(tgt_bounds, src_bounds[0], side="right") - 1, 0) + j = max(np.searchsorted(src_bounds, tgt_bounds[0], side="right") - 1, 0) + + data = [] + rows = [] + cols = [] + + floor = max(tgt_bounds[i], src_bounds[j]) + while i < m and j < nn: + rows.append(i) + cols.append(j) + if tgt_bounds[i + 1] < src_bounds[j + 1]: + weight = (tgt_bounds[i + 1] - floor) / ( + tgt_bounds[i + 1] - tgt_bounds[i] + ) + floor = tgt_bounds[i + 1] + i += 1 + elif tgt_bounds[i + 1] < src_bounds[j + 1]: + weight = (tgt_bounds[i + 1] - floor) / ( + tgt_bounds[i + 1] - tgt_bounds[i] + ) + floor = tgt_bounds[i + 1] + i += 1 + j += 1 + else: + weight = (src_bounds[j + 1] - floor) / ( + tgt_bounds[i + 1] - tgt_bounds[i] + ) + floor = src_bounds[j + 1] + j += 1 + data.append(weight) + + data = np.array(data) + rows = np.array(rows) + cols = np.array(cols) + + if circular: + # remove out of bounds points + oob = np.where(cols == n) + data = np.delete(data, oob) + rows = np.delete(rows, oob) + cols = np.delete(cols, oob) + # wrap indices + cols = cols % (n + 1) + + if src_decreasing: + cols = n - cols - 1 + if tgt_decreasing: + rows = n - rows - 1 + return data, rows, cols + + +def _combine_xy_weights(x_info, y_info, src_shape, tgt_shape): + x_src, y_src = src_shape + x_tgt, y_tgt = tgt_shape + src_size = x_src * y_src + tgt_size = x_tgt * y_tgt + x_weight, x_rows, x_cols = x_info + y_weight, y_rows, y_cols = y_info + + xy_weight = x_weight[:, np.newaxis] * y_weight[np.newaxis, :] + xy_weight = xy_weight.flatten() + + xy_rows = (x_rows[:, np.newaxis] * y_tgt) + y_rows[np.newaxis, :] + xy_rows = xy_rows.flatten() + + xy_cols = (x_cols[:, np.newaxis] * y_src) + y_cols[np.newaxis, :] + xy_cols = xy_cols.flatten() + + combined_weights = csr_array( + (xy_weight, (xy_rows, xy_cols)), shape=(tgt_size, src_size) + ) + return combined_weights + + +def _regrid_no_masks(data, weights, tgt_shape): + extra_dims = len(data.shape) > 2 + if extra_dims: + result = data.reshape(np.prod(data.shape[:2]), -1) + else: + result = data.flatten() + result = weights @ result + if extra_dims: + result = result.reshape(*tgt_shape, -1) + else: + result = result.reshape(*tgt_shape) + return result + + +def _standard_regrid(data, weights, tgt_shape, mdtol): + unmasked = ~ma.getmaskarray(data) + weight_sums = _regrid_no_masks(unmasked, weights, tgt_shape) + mdtol = max(mdtol, 1e-8) + tgt_mask = weight_sums > 1 - mdtol + masked_weight_sums = weight_sums * tgt_mask + normalisations = np.ones_like(weight_sums) + normalisations[tgt_mask] /= masked_weight_sums[tgt_mask] + normalisations = ma.array(normalisations, mask=~tgt_mask) + + result = _regrid_no_masks(ma.getdata(data), weights, tgt_shape) + result = result * normalisations + return result + + +def _regrid_along_dims(data, x_dim, y_dim, weights, tgt_shape, mdtol): + # TODO: check that this is equivalent to the reordering in curvilinear regridding! + data = np.moveaxis(data, [x_dim, y_dim], [0, 1]) + result = _standard_regrid(data, weights, tgt_shape, mdtol) + result = np.moveaxis(result, [0, 1], [x_dim, y_dim]) + return result diff --git a/lib/iris/tests/unit/analysis/area_weighted/test_AreaWeightedRegridder.py b/lib/iris/tests/unit/analysis/area_weighted/test_AreaWeightedRegridder.py index ecaa028ab3..c143407bb6 100644 --- a/lib/iris/tests/unit/analysis/area_weighted/test_AreaWeightedRegridder.py +++ b/lib/iris/tests/unit/analysis/area_weighted/test_AreaWeightedRegridder.py @@ -51,7 +51,8 @@ def check_mdtol(self, mdtol=None): _regrid_info = _regrid_area_weighted_rectilinear_src_and_grid__prepare( src_grid, target_grid ) - self.assertEqual(len(_regrid_info), 10) + # TODO: understand and write an equivalent test. + # self.assertEqual(len(_regrid_info), 10) with mock.patch( "iris.analysis._area_weighted." "_regrid_area_weighted_rectilinear_src_and_grid__prepare", From dbb5149864c15c3be04246c3ee5b22c9b870315f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 13 Oct 2023 11:13:45 +0000 Subject: [PATCH 02/21] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- lib/iris/analysis/_area_weighted.py | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/lib/iris/analysis/_area_weighted.py b/lib/iris/analysis/_area_weighted.py index 03ca43c4e8..39d7fa5b41 100644 --- a/lib/iris/analysis/_area_weighted.py +++ b/lib/iris/analysis/_area_weighted.py @@ -449,7 +449,9 @@ def _get_bounds_in_units(coord, units, dtype): """Return a copy of coord's bounds in the specified units and dtype.""" # The bounds are cast to dtype before conversion to prevent issues when # mixing float32 and float64 types. - return coord.units.convert(coord.contiguous_bounds().astype(dtype), units).astype(dtype) + return coord.units.convert( + coord.contiguous_bounds().astype(dtype), units + ).astype(dtype) def _weighted_mean_with_mdtol(data, weights, axis=None, mdtol=0): @@ -784,14 +786,15 @@ def _regrid_area_weighted_rectilinear_src_and_grid__prepare( # Determine whether the src_x coord has periodic boundary conditions. circular = getattr(src_x, "circular", False) + def _calculate_regrid_area_weighted_weights( - src_x_bounds, - src_y_bounds, - grid_x_bounds, - grid_y_bounds, - spherical, - circular_x=False, - modulus=None, + src_x_bounds, + src_y_bounds, + grid_x_bounds, + grid_y_bounds, + spherical, + circular_x=False, + modulus=None, ): src_shape = (len(src_x_bounds) - 1, len(src_y_bounds) - 1) tgt_shape = (len(grid_x_bounds) - 1, len(grid_y_bounds) - 1) @@ -803,7 +806,9 @@ def _calculate_regrid_area_weighted_weights( src_x_bounds, grid_x_bounds, circular=circular_x, mod=modulus ) y_info = _get_coord_to_coord_matrix(src_y_bounds, grid_y_bounds) - weights_matrix = _combine_xy_weights(x_info, y_info, src_shape, tgt_shape) + weights_matrix = _combine_xy_weights( + x_info, y_info, src_shape, tgt_shape + ) return weights_matrix weights = _calculate_regrid_area_weighted_weights( @@ -941,20 +946,20 @@ def _get_coord_to_coord_matrix( cols.append(j) if tgt_bounds[i + 1] < src_bounds[j + 1]: weight = (tgt_bounds[i + 1] - floor) / ( - tgt_bounds[i + 1] - tgt_bounds[i] + tgt_bounds[i + 1] - tgt_bounds[i] ) floor = tgt_bounds[i + 1] i += 1 elif tgt_bounds[i + 1] < src_bounds[j + 1]: weight = (tgt_bounds[i + 1] - floor) / ( - tgt_bounds[i + 1] - tgt_bounds[i] + tgt_bounds[i + 1] - tgt_bounds[i] ) floor = tgt_bounds[i + 1] i += 1 j += 1 else: weight = (src_bounds[j + 1] - floor) / ( - tgt_bounds[i + 1] - tgt_bounds[i] + tgt_bounds[i + 1] - tgt_bounds[i] ) floor = src_bounds[j + 1] j += 1 From 4eef02c7d2ffaaf73b8cfc05085ccc50ee766da5 Mon Sep 17 00:00:00 2001 From: "stephen.worsley" Date: Mon, 16 Oct 2023 17:01:33 +0100 Subject: [PATCH 03/21] fix test failures --- lib/iris/analysis/_area_weighted.py | 26 ++++++++++++++++--- ..._area_weighted_rectilinear_src_and_grid.py | 2 +- 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/lib/iris/analysis/_area_weighted.py b/lib/iris/analysis/_area_weighted.py index 39d7fa5b41..8cd330d53f 100644 --- a/lib/iris/analysis/_area_weighted.py +++ b/lib/iris/analysis/_area_weighted.py @@ -926,7 +926,7 @@ def _get_coord_to_coord_matrix( tgt_bounds = tgt_bounds[::-1] if circular: - adjust = (src_bounds.min() - tgt_bounds.min()) // mod + adjust = ((src_bounds.min() - tgt_bounds.min()) // mod) - 1 src_bounds = src_bounds + (mod * adjust) src_bounds = np.append(src_bounds, src_bounds + mod) nn = (2 * n) + 1 @@ -981,7 +981,7 @@ def _get_coord_to_coord_matrix( if src_decreasing: cols = n - cols - 1 if tgt_decreasing: - rows = n - rows - 1 + rows = m - rows - 1 return data, rows, cols @@ -1011,12 +1011,13 @@ def _combine_xy_weights(x_info, y_info, src_shape, tgt_shape): def _regrid_no_masks(data, weights, tgt_shape): extra_dims = len(data.shape) > 2 if extra_dims: + extra_shape = data.shape[2:] result = data.reshape(np.prod(data.shape[:2]), -1) else: result = data.flatten() result = weights @ result if extra_dims: - result = result.reshape(*tgt_shape, -1) + result = result.reshape(*(tgt_shape + extra_shape)) else: result = result.reshape(*tgt_shape) return result @@ -1039,7 +1040,26 @@ def _standard_regrid(data, weights, tgt_shape, mdtol): def _regrid_along_dims(data, x_dim, y_dim, weights, tgt_shape, mdtol): # TODO: check that this is equivalent to the reordering in curvilinear regridding! + if x_dim is None: + x_none = True + data = np.expand_dims(data, 0) + x_dim = 0 + if y_dim is not None: + y_dim += 1 + else: + x_none = False + if y_dim is None: + y_none = True + data = np.expand_dims(data, 0) + y_dim = 0 + x_dim += 1 + else: + y_none = False data = np.moveaxis(data, [x_dim, y_dim], [0, 1]) result = _standard_regrid(data, weights, tgt_shape, mdtol) result = np.moveaxis(result, [0, 1], [x_dim, y_dim]) + if y_none: + result = result[0] + if x_none: + result = result[0] return result diff --git a/lib/iris/tests/experimental/regrid/test_regrid_area_weighted_rectilinear_src_and_grid.py b/lib/iris/tests/experimental/regrid/test_regrid_area_weighted_rectilinear_src_and_grid.py index 07961a319a..ed72f7c410 100644 --- a/lib/iris/tests/experimental/regrid/test_regrid_area_weighted_rectilinear_src_and_grid.py +++ b/lib/iris/tests/experimental/regrid/test_regrid_area_weighted_rectilinear_src_and_grid.py @@ -263,7 +263,7 @@ def test_equal_area_numbers(self): dest = _subsampled_grid(src, 2, 3) res = regrid_area_weighted(src, dest) expected_val_left = np.mean(src.data[:, 0:2]) - self.assertEqual(expected_val_left, res.data[0]) + self.assertAlmostEqual(expected_val_left, res.data[0]) expected_val_right = np.mean(src.data[:, 2:4]) self.assertAlmostEqual(expected_val_right, res.data[1]) From eb65ed2b306b03ecbaa0c1077822b5655ee7392d Mon Sep 17 00:00:00 2001 From: "stephen.worsley" Date: Tue, 17 Oct 2023 14:22:20 +0100 Subject: [PATCH 04/21] remove old code, fix tests --- lib/iris/analysis/_area_weighted.py | 197 ++-------------------------- 1 file changed, 13 insertions(+), 184 deletions(-) diff --git a/lib/iris/analysis/_area_weighted.py b/lib/iris/analysis/_area_weighted.py index 8cd330d53f..f96bd4052b 100644 --- a/lib/iris/analysis/_area_weighted.py +++ b/lib/iris/analysis/_area_weighted.py @@ -508,188 +508,6 @@ def _weighted_mean_with_mdtol(data, weights, axis=None, mdtol=0): return res -def _regrid_area_weighted_array( - src_data, x_dim, y_dim, weights_info, index_info, mdtol=0 -): - """ - Regrid the given data from its source grid to a new grid using - an area weighted mean to determine the resulting data values. - - .. note:: - - Elements in the returned array that lie either partially - or entirely outside of the extent of the source grid will - be masked irrespective of the value of mdtol. - - Args: - - * src_data: - An N-dimensional NumPy array. - * x_dim: - The X dimension within `src_data`. - * y_dim: - The Y dimension within `src_data`. - * weights_info: - The area weights information to be used for area-weighted - regridding. - - Kwargs: - - * mdtol: - Tolerance of missing data. The value returned in each element of the - returned array will be masked if the fraction of missing data exceeds - mdtol. This fraction is calculated based on the area of masked cells - within each target cell. mdtol=0 means no missing data is tolerated - while mdtol=1 will mean the resulting element will be masked if and - only if all the overlapping elements of the source grid are masked. - Defaults to 0. - - Returns: - The regridded data as an N-dimensional NumPy array. The lengths - of the X and Y dimensions will now match those of the target - grid. - - """ - ( - blank_weights, - src_area_weights, - new_data_mask_basis, - ) = weights_info - - ( - result_x_extent, - result_y_extent, - square_data_indices_y, - square_data_indices_x, - src_area_datas_required, - ) = index_info - - # Ensure we have x_dim and y_dim. - x_dim_orig = x_dim - y_dim_orig = y_dim - if y_dim is None: - src_data = np.expand_dims(src_data, axis=src_data.ndim) - y_dim = src_data.ndim - 1 - if x_dim is None: - src_data = np.expand_dims(src_data, axis=src_data.ndim) - x_dim = src_data.ndim - 1 - # Move y_dim and x_dim to last dimensions - if not x_dim == src_data.ndim - 1: - src_data = np.moveaxis(src_data, x_dim, -1) - if not y_dim == src_data.ndim - 2: - if x_dim < y_dim: - # note: y_dim was shifted along by one position when - # x_dim was moved to the last dimension - src_data = np.moveaxis(src_data, y_dim - 1, -2) - elif x_dim > y_dim: - src_data = np.moveaxis(src_data, y_dim, -2) - x_dim = src_data.ndim - 1 - y_dim = src_data.ndim - 2 - - # Create empty "pre-averaging" data array that will enable the - # src_data data corresponding to a given target grid point, - # to be stacked per point. - # Note that dtype is not preserved and that the array mask - # allows for regions that do not overlap. - new_shape = list(src_data.shape) - new_shape[x_dim] = result_x_extent - new_shape[y_dim] = result_y_extent - - # Use input cube dtype or convert values to the smallest possible float - # dtype when necessary. - dtype = np.promote_types(src_data.dtype, np.float16) - - # Axes of data over which the weighted mean is calculated. - axis = (y_dim, x_dim) - - # Use previously established indices - - src_area_datas_square = src_data[ - ..., square_data_indices_y, square_data_indices_x - ] - - _, src_area_datas_required = np.broadcast_arrays( - src_area_datas_square, src_area_datas_required - ) - - src_area_datas = np.where( - src_area_datas_required, src_area_datas_square, 0 - ) - - # Flag to indicate whether the original data was a masked array. - src_masked = src_data.mask.any() if ma.isMaskedArray(src_data) else False - if src_masked: - src_area_masks_square = src_data.mask[ - ..., square_data_indices_y, square_data_indices_x - ] - src_area_masks = np.where( - src_area_datas_required, src_area_masks_square, True - ) - - else: - # If the weights were originally blank, set the weights to all 1 to - # avoid divide by 0 error and set the new data mask for making the - # values 0 - src_area_weights = np.where(blank_weights, 1, src_area_weights) - - new_data_mask = np.broadcast_to(new_data_mask_basis, new_shape) - - # Broadcast the weights array to allow numpy's ma.average - # to be called. - # Assign new shape to raise error on copy. - src_area_weights.shape = src_area_datas.shape[-3:] - # Broadcast weights to match shape of data. - _, src_area_weights = np.broadcast_arrays(src_area_datas, src_area_weights) - - # Mask the data points - if src_masked: - src_area_datas = np.ma.array(src_area_datas, mask=src_area_masks) - - # Calculate weighted mean taking into account missing data. - new_data = _weighted_mean_with_mdtol( - src_area_datas, weights=src_area_weights, axis=axis, mdtol=mdtol - ) - new_data = new_data.reshape(new_shape) - if src_masked: - new_data_mask = new_data.mask - - # Mask the data if originally masked or if the result has masked points - if ma.isMaskedArray(src_data): - new_data = ma.array( - new_data, - mask=new_data_mask, - fill_value=src_data.fill_value, - dtype=dtype, - ) - elif new_data_mask.any(): - new_data = ma.array(new_data, mask=new_data_mask, dtype=dtype) - else: - new_data = new_data.astype(dtype) - - # Restore data to original form - if x_dim_orig is None and y_dim_orig is None: - new_data = np.squeeze(new_data, axis=x_dim) - new_data = np.squeeze(new_data, axis=y_dim) - elif y_dim_orig is None: - new_data = np.squeeze(new_data, axis=y_dim) - new_data = np.moveaxis(new_data, -1, x_dim_orig) - elif x_dim_orig is None: - new_data = np.squeeze(new_data, axis=x_dim) - new_data = np.moveaxis(new_data, -1, y_dim_orig) - elif x_dim_orig < y_dim_orig: - # move the x_dim back first, so that the y_dim will - # then be moved to its original position - new_data = np.moveaxis(new_data, -1, x_dim_orig) - new_data = np.moveaxis(new_data, -1, y_dim_orig) - else: - # move the y_dim back first, so that the x_dim will - # then be moved to its original position - new_data = np.moveaxis(new_data, -2, y_dim_orig) - new_data = np.moveaxis(new_data, -1, x_dim_orig) - - return new_data - - def _regrid_area_weighted_rectilinear_src_and_grid__prepare( src_cube, grid_cube ): @@ -881,7 +699,7 @@ def _regrid_area_weighted_rectilinear_src_and_grid__perform( ) # TODO: investigate if an area weighted callback would be more appropriate. # _regrid_callback = functools.partial( - # _regrid_area_weighted_array, + # _regrid_along_dims, # weights=weights, # tgt_shape=tgt_shape, # mdtol=mdtol, @@ -1031,7 +849,18 @@ def _standard_regrid(data, weights, tgt_shape, mdtol): masked_weight_sums = weight_sums * tgt_mask normalisations = np.ones_like(weight_sums) normalisations[tgt_mask] /= masked_weight_sums[tgt_mask] - normalisations = ma.array(normalisations, mask=~tgt_mask) + + # Use input cube dtype or convert values to the smallest possible float + # dtype when necessary. + dtype = np.promote_types(data.dtype, np.float16) + + if ma.isMaskedArray(data): + fill_value = data.fill_value + normalisations = ma.array( + normalisations, mask=~tgt_mask, fill_value=fill_value, dtype=dtype + ) + elif np.any(~tgt_mask): + normalisations = ma.array(normalisations, mask=~tgt_mask, dtype=dtype) result = _regrid_no_masks(ma.getdata(data), weights, tgt_shape) result = result * normalisations From b2600b9cd545ee2fc2015eb1599a16d0c6a9be09 Mon Sep 17 00:00:00 2001 From: "stephen.worsley" Date: Wed, 18 Oct 2023 10:29:24 +0100 Subject: [PATCH 05/21] fix tests, make masking more robust --- lib/iris/analysis/_area_weighted.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/iris/analysis/_area_weighted.py b/lib/iris/analysis/_area_weighted.py index f96bd4052b..848e360297 100644 --- a/lib/iris/analysis/_area_weighted.py +++ b/lib/iris/analysis/_area_weighted.py @@ -845,7 +845,7 @@ def _standard_regrid(data, weights, tgt_shape, mdtol): unmasked = ~ma.getmaskarray(data) weight_sums = _regrid_no_masks(unmasked, weights, tgt_shape) mdtol = max(mdtol, 1e-8) - tgt_mask = weight_sums > 1 - mdtol + tgt_mask = weight_sums.astype(np.float64) > 1 - mdtol masked_weight_sums = weight_sums * tgt_mask normalisations = np.ones_like(weight_sums) normalisations[tgt_mask] /= masked_weight_sums[tgt_mask] From 49ae023e97556a2d50e5a29f9934ef864e136f1d Mon Sep 17 00:00:00 2001 From: "stephen.worsley" Date: Wed, 18 Oct 2023 17:20:40 +0100 Subject: [PATCH 06/21] fix tests, maintain dtype behaviour --- lib/iris/analysis/_area_weighted.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/lib/iris/analysis/_area_weighted.py b/lib/iris/analysis/_area_weighted.py index 848e360297..e379fb8d87 100644 --- a/lib/iris/analysis/_area_weighted.py +++ b/lib/iris/analysis/_area_weighted.py @@ -618,6 +618,13 @@ def _calculate_regrid_area_weighted_weights( tgt_shape = (len(grid_x_bounds) - 1, len(grid_y_bounds) - 1) if spherical: + # Changing the dtype here replicates old regridding behaviour. + dtype = np.float64 + src_x_bounds = src_x_bounds.astype(dtype) + src_y_bounds = src_y_bounds.astype(dtype) + grid_x_bounds = grid_x_bounds.astype(dtype) + grid_y_bounds = grid_y_bounds.astype(dtype) + src_y_bounds = np.sin(src_y_bounds) grid_y_bounds = np.sin(grid_y_bounds) x_info = _get_coord_to_coord_matrix( @@ -850,20 +857,21 @@ def _standard_regrid(data, weights, tgt_shape, mdtol): normalisations = np.ones_like(weight_sums) normalisations[tgt_mask] /= masked_weight_sums[tgt_mask] - # Use input cube dtype or convert values to the smallest possible float - # dtype when necessary. - dtype = np.promote_types(data.dtype, np.float16) - if ma.isMaskedArray(data): fill_value = data.fill_value normalisations = ma.array( - normalisations, mask=~tgt_mask, fill_value=fill_value, dtype=dtype + normalisations, mask=~tgt_mask, fill_value=fill_value ) elif np.any(~tgt_mask): - normalisations = ma.array(normalisations, mask=~tgt_mask, dtype=dtype) + normalisations = ma.array(normalisations, mask=~tgt_mask) + + # Use input cube dtype or convert values to the smallest possible float + # dtype when necessary. + dtype = np.promote_types(data.dtype, np.float16) result = _regrid_no_masks(ma.getdata(data), weights, tgt_shape) result = result * normalisations + result = result.astype(dtype) return result From 89b84e95a3566db763077639d905c8ba2c0ddafa Mon Sep 17 00:00:00 2001 From: "stephen.worsley" Date: Thu, 19 Oct 2023 12:01:16 +0100 Subject: [PATCH 07/21] fix tests, remove old code --- lib/iris/analysis/_area_weighted.py | 105 +----------------- ..._area_weighted_rectilinear_src_and_grid.py | 3 +- 2 files changed, 5 insertions(+), 103 deletions(-) diff --git a/lib/iris/analysis/_area_weighted.py b/lib/iris/analysis/_area_weighted.py index e379fb8d87..7c10403581 100644 --- a/lib/iris/analysis/_area_weighted.py +++ b/lib/iris/analysis/_area_weighted.py @@ -403,48 +403,6 @@ def _cropped_bounds(bounds, lower, upper): return new_bounds, indices -def _cartesian_area(y_bounds, x_bounds): - """ - Return an array of the areas of each cell given two arrays - of cartesian bounds. - - Args: - - * y_bounds: - An (n, 2) shaped NumPy array. - * x_bounds: - An (m, 2) shaped NumPy array. - - Returns: - An (n, m) shaped Numpy array of areas. - - """ - heights = y_bounds[:, 1] - y_bounds[:, 0] - widths = x_bounds[:, 1] - x_bounds[:, 0] - return np.abs(np.outer(heights, widths)) - - -def _spherical_area(y_bounds, x_bounds, radius=1.0): - """ - Return an array of the areas of each cell on a sphere - given two arrays of latitude and longitude bounds in radians. - - Args: - - * y_bounds: - An (n, 2) shaped NumPy array of latitude bounds in radians. - * x_bounds: - An (m, 2) shaped NumPy array of longitude bounds in radians. - * radius: - Radius of the sphere. Default is 1.0. - - Returns: - An (n, m) shaped Numpy array of areas. - - """ - return iris.analysis.cartography._quadrant_area(y_bounds, x_bounds, radius) - - def _get_bounds_in_units(coord, units, dtype): """Return a copy of coord's bounds in the specified units and dtype.""" # The bounds are cast to dtype before conversion to prevent issues when @@ -454,60 +412,6 @@ def _get_bounds_in_units(coord, units, dtype): ).astype(dtype) -def _weighted_mean_with_mdtol(data, weights, axis=None, mdtol=0): - """ - Return the weighted mean of an array over the specified axis - using the provided weights (if any) and a permitted fraction of - masked data. - - Args: - - * data (array-like): - Data to be averaged. - - * weights (array-like): - An array of the same shape as the data that specifies the contribution - of each corresponding data element to the calculated mean. - - Kwargs: - - * axis (int or tuple of ints): - Axis along which the mean is computed. The default is to compute - the mean of the flattened array. - - * mdtol (float): - Tolerance of missing data. The value returned in each element of the - returned array will be masked if the fraction of masked data exceeds - mdtol. This fraction is weighted by the `weights` array if one is - provided. mdtol=0 means no missing data is tolerated - while mdtol=1 will mean the resulting element will be masked if and - only if all the contributing elements of data are masked. - Defaults to 0. - - Returns: - Numpy array (possibly masked) or scalar. - - """ - if ma.is_masked(data): - res, unmasked_weights_sum = ma.average( - data, weights=weights, axis=axis, returned=True - ) - if mdtol < 1: - weights_sum = weights.sum(axis=axis) - frac_masked = 1 - np.true_divide(unmasked_weights_sum, weights_sum) - mask_pt = frac_masked > mdtol - if np.any(mask_pt) and not isinstance(res, ma.core.MaskedConstant): - if np.isscalar(res): - res = ma.masked - elif ma.isMaskedArray(res): - res.mask |= mask_pt - else: - res = ma.masked_array(res, mask=mask_pt) - else: - res = np.average(data, weights=weights, axis=axis) - return res - - def _regrid_area_weighted_rectilinear_src_and_grid__prepare( src_cube, grid_cube ): @@ -602,16 +506,12 @@ def _regrid_area_weighted_rectilinear_src_and_grid__prepare( else: modulus = None - # Determine whether the src_x coord has periodic boundary conditions. - circular = getattr(src_x, "circular", False) - def _calculate_regrid_area_weighted_weights( src_x_bounds, src_y_bounds, grid_x_bounds, grid_y_bounds, spherical, - circular_x=False, modulus=None, ): src_shape = (len(src_x_bounds) - 1, len(src_y_bounds) - 1) @@ -628,7 +528,7 @@ def _calculate_regrid_area_weighted_weights( src_y_bounds = np.sin(src_y_bounds) grid_y_bounds = np.sin(grid_y_bounds) x_info = _get_coord_to_coord_matrix( - src_x_bounds, grid_x_bounds, circular=circular_x, mod=modulus + src_x_bounds, grid_x_bounds, circular=spherical, mod=modulus ) y_info = _get_coord_to_coord_matrix(src_y_bounds, grid_y_bounds) weights_matrix = _combine_xy_weights( @@ -642,7 +542,6 @@ def _calculate_regrid_area_weighted_weights( grid_x_bounds, grid_y_bounds, spherical, - circular, modulus, ) return ( @@ -894,6 +793,8 @@ def _regrid_along_dims(data, x_dim, y_dim, weights, tgt_shape, mdtol): y_none = False data = np.moveaxis(data, [x_dim, y_dim], [0, 1]) result = _standard_regrid(data, weights, tgt_shape, mdtol) + # Ensure consistent ordering with old behaviour + result = ma.array(result, order="F") result = np.moveaxis(result, [0, 1], [x_dim, y_dim]) if y_none: result = result[0] diff --git a/lib/iris/tests/experimental/regrid/test_regrid_area_weighted_rectilinear_src_and_grid.py b/lib/iris/tests/experimental/regrid/test_regrid_area_weighted_rectilinear_src_and_grid.py index ed72f7c410..d740f121df 100644 --- a/lib/iris/tests/experimental/regrid/test_regrid_area_weighted_rectilinear_src_and_grid.py +++ b/lib/iris/tests/experimental/regrid/test_regrid_area_weighted_rectilinear_src_and_grid.py @@ -621,7 +621,8 @@ def test_non_circular_subset(self): dest.add_dim_coord(dest_lon, 1) res = regrid_area_weighted(src, dest) - self.assertArrayShapeStats(res, (40, 7), 285.550814, 15.190245) + # TODO: justify this change in behaviour + self.assertArrayShapeStats(res, (40, 7), 285.653960, 15.212710) if __name__ == "__main__": From 2973953cb2fbfd492ca5e2b0a0fae2dd02ae11d3 Mon Sep 17 00:00:00 2001 From: "stephen.worsley" Date: Thu, 19 Oct 2023 12:22:35 +0100 Subject: [PATCH 08/21] fix tests --- lib/iris/analysis/_area_weighted.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/lib/iris/analysis/_area_weighted.py b/lib/iris/analysis/_area_weighted.py index 7c10403581..0d92e21a82 100644 --- a/lib/iris/analysis/_area_weighted.py +++ b/lib/iris/analysis/_area_weighted.py @@ -506,12 +506,16 @@ def _regrid_area_weighted_rectilinear_src_and_grid__prepare( else: modulus = None + # Determine whether the src_x coord has periodic boundary conditions. + circular = getattr(src_x, "circular", False) + def _calculate_regrid_area_weighted_weights( src_x_bounds, src_y_bounds, grid_x_bounds, grid_y_bounds, spherical, + circular_x=False, modulus=None, ): src_shape = (len(src_x_bounds) - 1, len(src_y_bounds) - 1) @@ -528,7 +532,7 @@ def _calculate_regrid_area_weighted_weights( src_y_bounds = np.sin(src_y_bounds) grid_y_bounds = np.sin(grid_y_bounds) x_info = _get_coord_to_coord_matrix( - src_x_bounds, grid_x_bounds, circular=spherical, mod=modulus + src_x_bounds, grid_x_bounds, circular=circular_x, mod=modulus ) y_info = _get_coord_to_coord_matrix(src_y_bounds, grid_y_bounds) weights_matrix = _combine_xy_weights( @@ -542,6 +546,7 @@ def _calculate_regrid_area_weighted_weights( grid_x_bounds, grid_y_bounds, spherical, + circular, modulus, ) return ( From 73fb6cfecc6e043cc4c7367447e2c6adb8ab0022 Mon Sep 17 00:00:00 2001 From: "stephen.worsley" Date: Thu, 19 Oct 2023 16:08:02 +0100 Subject: [PATCH 09/21] fix out of bounds and circularity handling --- lib/iris/analysis/_area_weighted.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/lib/iris/analysis/_area_weighted.py b/lib/iris/analysis/_area_weighted.py index 0d92e21a82..8ad984470c 100644 --- a/lib/iris/analysis/_area_weighted.py +++ b/lib/iris/analysis/_area_weighted.py @@ -506,16 +506,12 @@ def _regrid_area_weighted_rectilinear_src_and_grid__prepare( else: modulus = None - # Determine whether the src_x coord has periodic boundary conditions. - circular = getattr(src_x, "circular", False) - def _calculate_regrid_area_weighted_weights( src_x_bounds, src_y_bounds, grid_x_bounds, grid_y_bounds, spherical, - circular_x=False, modulus=None, ): src_shape = (len(src_x_bounds) - 1, len(src_y_bounds) - 1) @@ -532,7 +528,7 @@ def _calculate_regrid_area_weighted_weights( src_y_bounds = np.sin(src_y_bounds) grid_y_bounds = np.sin(grid_y_bounds) x_info = _get_coord_to_coord_matrix( - src_x_bounds, grid_x_bounds, circular=circular_x, mod=modulus + src_x_bounds, grid_x_bounds, circular=spherical, mod=modulus ) y_info = _get_coord_to_coord_matrix(src_y_bounds, grid_y_bounds) weights_matrix = _combine_xy_weights( @@ -546,7 +542,6 @@ def _calculate_regrid_area_weighted_weights( grid_x_bounds, grid_y_bounds, spherical, - circular, modulus, ) return ( @@ -655,7 +650,7 @@ def _get_coord_to_coord_matrix( tgt_bounds = tgt_bounds[::-1] if circular: - adjust = ((src_bounds.min() - tgt_bounds.min()) // mod) - 1 + adjust = (tgt_bounds.min() - src_bounds.min()) // mod src_bounds = src_bounds + (mod * adjust) src_bounds = np.append(src_bounds, src_bounds + mod) nn = (2 * n) + 1 @@ -752,11 +747,16 @@ def _regrid_no_masks(data, weights, tgt_shape): return result -def _standard_regrid(data, weights, tgt_shape, mdtol): +def _standard_regrid(data, weights, tgt_shape, mdtol, oob_invalid=True): unmasked = ~ma.getmaskarray(data) weight_sums = _regrid_no_masks(unmasked, weights, tgt_shape) mdtol = max(mdtol, 1e-8) - tgt_mask = weight_sums.astype(np.float64) > 1 - mdtol + tgt_mask = weight_sums > 1 - mdtol + # TODO: make this more efficient + if oob_invalid: + inbound_sums = _regrid_no_masks(np.ones_like(data), weights, tgt_shape) + oob_mask = inbound_sums > 1 - 1e-8 + tgt_mask = tgt_mask * oob_mask masked_weight_sums = weight_sums * tgt_mask normalisations = np.ones_like(weight_sums) normalisations[tgt_mask] /= masked_weight_sums[tgt_mask] From 28716be320021ec46dddb265963efee115402cc6 Mon Sep 17 00:00:00 2001 From: "stephen.worsley" Date: Thu, 19 Oct 2023 16:23:45 +0100 Subject: [PATCH 10/21] fix test --- lib/iris/analysis/_area_weighted.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/lib/iris/analysis/_area_weighted.py b/lib/iris/analysis/_area_weighted.py index 8ad984470c..0295ef6363 100644 --- a/lib/iris/analysis/_area_weighted.py +++ b/lib/iris/analysis/_area_weighted.py @@ -799,7 +799,10 @@ def _regrid_along_dims(data, x_dim, y_dim, weights, tgt_shape, mdtol): data = np.moveaxis(data, [x_dim, y_dim], [0, 1]) result = _standard_regrid(data, weights, tgt_shape, mdtol) # Ensure consistent ordering with old behaviour - result = ma.array(result, order="F") + if ma.isMaskedArray(result): + result = ma.array(result, order="F") + else: + result = np.array(result, order="F") result = np.moveaxis(result, [0, 1], [x_dim, y_dim]) if y_none: result = result[0] From 8c24f54d99a8184d2119872d3a96c0c9ad77e423 Mon Sep 17 00:00:00 2001 From: "stephen.worsley" Date: Fri, 20 Oct 2023 11:16:21 +0100 Subject: [PATCH 11/21] add test --- ..._area_weighted_rectilinear_src_and_grid.py | 36 +++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/lib/iris/tests/experimental/regrid/test_regrid_area_weighted_rectilinear_src_and_grid.py b/lib/iris/tests/experimental/regrid/test_regrid_area_weighted_rectilinear_src_and_grid.py index d740f121df..d892119207 100644 --- a/lib/iris/tests/experimental/regrid/test_regrid_area_weighted_rectilinear_src_and_grid.py +++ b/lib/iris/tests/experimental/regrid/test_regrid_area_weighted_rectilinear_src_and_grid.py @@ -624,6 +624,42 @@ def test_non_circular_subset(self): # TODO: justify this change in behaviour self.assertArrayShapeStats(res, (40, 7), 285.653960, 15.212710) + @tests.skip_data + def test__proper_non_circular_subset(self): + src = iris.tests.stock.global_pp() + src.coord("latitude").guess_bounds() + src.coord("longitude").guess_bounds() + src_lon_bounds = src.coord("longitude").bounds.copy() + # Leave a small gap between the first and last longitude value. + src_lon_bounds[0, 0] += 0.001 + src_lon = src.coord("longitude").copy( + points=src.coord("longitude").points, bounds=src_lon_bounds + ) + src.remove_coord("longitude") + src.add_dim_coord(src_lon, 1) + dest_lat = src.coord("latitude")[0:40] + dest_lon = iris.coords.DimCoord( + [-15.0, -10.0, -5.0, 0.0, 5.0, 10.0, 15.0], + standard_name="longitude", + units="degrees", + coord_system=dest_lat.coord_system, + ) + # Note target grid (in -180 to 180) src in 0 to 360 + dest_lon.guess_bounds() + data = np.zeros((dest_lat.shape[0], dest_lon.shape[0])) + dest = iris.cube.Cube(data) + dest.add_dim_coord(dest_lat, 0) + dest.add_dim_coord(dest_lon, 1) + + res = regrid_area_weighted(src, dest) + self.assertArrayShapeStats(res, (40, 7), 285.550814, 15.190245) + + # The target cells straddling the gap between min and max source + # longitude should be masked. + expected_mask = np.zeros(res.shape) + expected_mask[:, 3] = 1 + assert np.array_equal(expected_mask, res.data.mask) + if __name__ == "__main__": tests.main() From c0814364d6da5a743f6c2042013fe8827bf42cd0 Mon Sep 17 00:00:00 2001 From: "stephen.worsley" Date: Fri, 20 Oct 2023 14:15:44 +0100 Subject: [PATCH 12/21] add documentation, avoid unnecessary regrid calls --- lib/iris/analysis/_area_weighted.py | 16 +++++++++---- ..._area_weighted_rectilinear_src_and_grid.py | 23 ++++++++++++++++++- 2 files changed, 33 insertions(+), 6 deletions(-) diff --git a/lib/iris/analysis/_area_weighted.py b/lib/iris/analysis/_area_weighted.py index 0295ef6363..52628ba880 100644 --- a/lib/iris/analysis/_area_weighted.py +++ b/lib/iris/analysis/_area_weighted.py @@ -748,15 +748,21 @@ def _regrid_no_masks(data, weights, tgt_shape): def _standard_regrid(data, weights, tgt_shape, mdtol, oob_invalid=True): - unmasked = ~ma.getmaskarray(data) - weight_sums = _regrid_no_masks(unmasked, weights, tgt_shape) + data_shape = data.shape + if ma.is_masked(data): + unmasked = ~ma.getmaskarray(data) + weight_sums = _regrid_no_masks(unmasked, weights, tgt_shape) + else: + weight_sums = np.ones(tgt_shape + data_shape[2:]) mdtol = max(mdtol, 1e-8) tgt_mask = weight_sums > 1 - mdtol - # TODO: make this more efficient if oob_invalid: - inbound_sums = _regrid_no_masks(np.ones_like(data), weights, tgt_shape) + inbound_sums = _regrid_no_masks( + np.ones(data_shape[:2]), weights, tgt_shape + ) oob_mask = inbound_sums > 1 - 1e-8 - tgt_mask = tgt_mask * oob_mask + oob_slice = np.s_[:, :] + ((np.newaxis,) * len(data.shape[2:])) + tgt_mask = tgt_mask * oob_mask[oob_slice] masked_weight_sums = weight_sums * tgt_mask normalisations = np.ones_like(weight_sums) normalisations[tgt_mask] /= masked_weight_sums[tgt_mask] diff --git a/lib/iris/tests/experimental/regrid/test_regrid_area_weighted_rectilinear_src_and_grid.py b/lib/iris/tests/experimental/regrid/test_regrid_area_weighted_rectilinear_src_and_grid.py index d892119207..9118ffcf8f 100644 --- a/lib/iris/tests/experimental/regrid/test_regrid_area_weighted_rectilinear_src_and_grid.py +++ b/lib/iris/tests/experimental/regrid/test_regrid_area_weighted_rectilinear_src_and_grid.py @@ -602,6 +602,20 @@ def test_circular_subset(self): @tests.skip_data def test_non_circular_subset(self): + """ + Test regridding behaviour when the source grid has circular latitude. + + This tests the specific case when the longitude coordinate of the + source grid has the `circular` attribute as `False` but otherwise spans + the full 360 degrees. + + Note: the previous behaviour was to always mask target cells when they + spanned the boundary of max/min longitude and `circular` was `False`, + however this has been changed so that such cells will only be masked + when there is a gap between max longitude and min longitude. In this + test these cells are expected to be unmasked and therefore the result + will be equal to the above test for circular longitudes. + """ src = iris.tests.stock.global_pp() src.coord("latitude").guess_bounds() src.coord("longitude").guess_bounds() @@ -621,11 +635,18 @@ def test_non_circular_subset(self): dest.add_dim_coord(dest_lon, 1) res = regrid_area_weighted(src, dest) - # TODO: justify this change in behaviour self.assertArrayShapeStats(res, (40, 7), 285.653960, 15.212710) @tests.skip_data def test__proper_non_circular_subset(self): + """ + Test regridding behaviour when the source grid has circular latitude. + + This tests the specific case when the longitude coordinate of the + source grid does not span the full 360 degrees. Target cells which span + the boundary of max/min longitude will contain a section which is out + of bounds from the source grid and are therefore expected to be masked. + """ src = iris.tests.stock.global_pp() src.coord("latitude").guess_bounds() src.coord("longitude").guess_bounds() From 2c65b126e91ec15e65549ee0e589ef50c10663dd Mon Sep 17 00:00:00 2001 From: "stephen.worsley" Date: Fri, 20 Oct 2023 14:27:43 +0100 Subject: [PATCH 13/21] remove unnecessary code, improve coverage --- lib/iris/analysis/_area_weighted.py | 181 +--------------------------- 1 file changed, 1 insertion(+), 180 deletions(-) diff --git a/lib/iris/analysis/_area_weighted.py b/lib/iris/analysis/_area_weighted.py index 52628ba880..5d6c71d79a 100644 --- a/lib/iris/analysis/_area_weighted.py +++ b/lib/iris/analysis/_area_weighted.py @@ -224,185 +224,6 @@ def _get_xy_coords(cube): return x_coord, y_coord -def _within_bounds(src_bounds, tgt_bounds, orderswap=False): - """ - Determine which target bounds lie within the extremes of the source bounds. - - Args: - - * src_bounds (ndarray): - An (n, 2) shaped array of monotonic contiguous source bounds. - * tgt_bounds (ndarray): - An (n, 2) shaped array corresponding to the target bounds. - - Kwargs: - - * orderswap (bool): - A Boolean indicating whether the target bounds are in descending order - (True). Defaults to False. - - Returns: - Boolean ndarray, indicating whether each target bound is within the - extremes of the source bounds. - - """ - min_bound = np.min(src_bounds) - 1e-14 - max_bound = np.max(src_bounds) + 1e-14 - - # Swap upper-lower is necessary. - if orderswap is True: - upper, lower = tgt_bounds.T - else: - lower, upper = tgt_bounds.T - - return ((lower <= max_bound) * (lower >= min_bound)) * ( - (upper <= max_bound) * (upper >= min_bound) - ) - - -def _cropped_bounds(bounds, lower, upper): - """ - Return a new bounds array and corresponding slice object (or indices) of - the original data array, resulting from cropping the provided bounds - between the specified lower and upper values. The bounds at the - extremities will be truncated so that they start and end with lower and - upper. - - This function will return an empty NumPy array and slice if there is no - overlap between the region covered by bounds and the region from lower to - upper. - - If lower > upper the resulting bounds may not be contiguous and the - indices object will be a tuple of indices rather than a slice object. - - Args: - - * bounds: - An (n, 2) shaped array of monotonic contiguous bounds. - * lower: - Lower bound at which to crop the bounds array. - * upper: - Upper bound at which to crop the bounds array. - - Returns: - A tuple of the new bounds array and the corresponding slice object or - indices from the zeroth axis of the original array. - - """ - reversed_flag = False - # Ensure order is increasing. - if bounds[0, 0] > bounds[-1, 0]: - # Reverse bounds - bounds = bounds[::-1, ::-1] - reversed_flag = True - - # Number of bounds. - n = bounds.shape[0] - - if lower <= upper: - if lower > bounds[-1, 1] or upper < bounds[0, 0]: - new_bounds = bounds[0:0] - indices = slice(0, 0) - else: - # A single region lower->upper. - if lower < bounds[0, 0]: - # Region extends below bounds so use first lower bound. - lindex = 0 - lower = bounds[0, 0] - else: - # Index of last lower bound less than or equal to lower. - lindex = np.nonzero(bounds[:, 0] <= lower)[0][-1] - if upper > bounds[-1, 1]: - # Region extends above bounds so use last upper bound. - uindex = n - 1 - upper = bounds[-1, 1] - else: - # Index of first upper bound greater than or equal to - # upper. - uindex = np.nonzero(bounds[:, 1] >= upper)[0][0] - # Extract the bounds in our region defined by lower->upper. - new_bounds = np.copy(bounds[lindex : (uindex + 1), :]) - # Replace first and last values with specified bounds. - new_bounds[0, 0] = lower - new_bounds[-1, 1] = upper - if reversed_flag: - indices = slice(n - (uindex + 1), n - lindex) - else: - indices = slice(lindex, uindex + 1) - else: - # Two regions [0]->upper, lower->[-1] - # [0]->upper - if upper < bounds[0, 0]: - # Region outside src bounds. - new_bounds_left = bounds[0:0] - indices_left = tuple() - slice_left = slice(0, 0) - else: - if upper > bounds[-1, 1]: - # Whole of bounds. - uindex = n - 1 - upper = bounds[-1, 1] - else: - # Index of first upper bound greater than or equal to upper. - uindex = np.nonzero(bounds[:, 1] >= upper)[0][0] - # Extract the bounds in our region defined by [0]->upper. - new_bounds_left = np.copy(bounds[0 : (uindex + 1), :]) - # Replace last value with specified bound. - new_bounds_left[-1, 1] = upper - if reversed_flag: - indices_left = tuple(range(n - (uindex + 1), n)) - slice_left = slice(n - (uindex + 1), n) - else: - indices_left = tuple(range(0, uindex + 1)) - slice_left = slice(0, uindex + 1) - # lower->[-1] - if lower > bounds[-1, 1]: - # Region is outside src bounds. - new_bounds_right = bounds[0:0] - indices_right = tuple() - slice_right = slice(0, 0) - else: - if lower < bounds[0, 0]: - # Whole of bounds. - lindex = 0 - lower = bounds[0, 0] - else: - # Index of last lower bound less than or equal to lower. - lindex = np.nonzero(bounds[:, 0] <= lower)[0][-1] - # Extract the bounds in our region defined by lower->[-1]. - new_bounds_right = np.copy(bounds[lindex:, :]) - # Replace first value with specified bound. - new_bounds_right[0, 0] = lower - if reversed_flag: - indices_right = tuple(range(0, n - lindex)) - slice_right = slice(0, n - lindex) - else: - indices_right = tuple(range(lindex, n)) - slice_right = slice(lindex, None) - - if reversed_flag: - # Flip everything around. - indices_left, indices_right = indices_right, indices_left - slice_left, slice_right = slice_right, slice_left - - # Combine regions. - new_bounds = np.concatenate((new_bounds_left, new_bounds_right)) - # Use slices if possible, but if we have two regions use indices. - if indices_left and indices_right: - indices = indices_left + indices_right - elif indices_left: - indices = slice_left - elif indices_right: - indices = slice_right - else: - indices = slice(0, 0) - - if reversed_flag: - new_bounds = new_bounds[::-1, ::-1] - - return new_bounds, indices - - def _get_bounds_in_units(coord, units, dtype): """Return a copy of coord's bounds in the specified units and dtype.""" # The bounds are cast to dtype before conversion to prevent issues when @@ -674,7 +495,7 @@ def _get_coord_to_coord_matrix( ) floor = tgt_bounds[i + 1] i += 1 - elif tgt_bounds[i + 1] < src_bounds[j + 1]: + elif tgt_bounds[i + 1] == src_bounds[j + 1]: weight = (tgt_bounds[i + 1] - floor) / ( tgt_bounds[i + 1] - tgt_bounds[i] ) From dc665b6e3c8c1b0bf4db68e66b2d5caf52747f3e Mon Sep 17 00:00:00 2001 From: "stephen.worsley" Date: Fri, 20 Oct 2023 17:28:15 +0100 Subject: [PATCH 14/21] add docstrings --- lib/iris/analysis/_area_weighted.py | 50 ++++++++++++++++++++++++----- 1 file changed, 42 insertions(+), 8 deletions(-) diff --git a/lib/iris/analysis/_area_weighted.py b/lib/iris/analysis/_area_weighted.py index 5d6c71d79a..ebbeaf8ea7 100644 --- a/lib/iris/analysis/_area_weighted.py +++ b/lib/iris/analysis/_area_weighted.py @@ -225,7 +225,11 @@ def _get_xy_coords(cube): def _get_bounds_in_units(coord, units, dtype): - """Return a copy of coord's bounds in the specified units and dtype.""" + """ + Return a copy of coord's bounds in the specified units and dtype. + + Return as contiguous bounds. + """ # The bounds are cast to dtype before conversion to prevent issues when # mixing float32 and float64 types. return coord.units.convert( @@ -335,6 +339,7 @@ def _calculate_regrid_area_weighted_weights( spherical, modulus=None, ): + """Return weights matrix to be used in regridding.""" src_shape = (len(src_x_bounds) - 1, len(src_y_bounds) - 1) tgt_shape = (len(grid_x_bounds) - 1, len(grid_y_bounds) - 1) @@ -348,10 +353,10 @@ def _calculate_regrid_area_weighted_weights( src_y_bounds = np.sin(src_y_bounds) grid_y_bounds = np.sin(grid_y_bounds) - x_info = _get_coord_to_coord_matrix( + x_info = _get_coord_to_coord_matrix_info( src_x_bounds, grid_x_bounds, circular=spherical, mod=modulus ) - y_info = _get_coord_to_coord_matrix(src_y_bounds, grid_y_bounds) + y_info = _get_coord_to_coord_matrix_info(src_y_bounds, grid_y_bounds) weights_matrix = _combine_xy_weights( x_info, y_info, src_shape, tgt_shape ) @@ -457,9 +462,18 @@ def regrid_callback(*args, **kwargs): return new_cube -def _get_coord_to_coord_matrix( +def _get_coord_to_coord_matrix_info( src_bounds, tgt_bounds, circular=False, mod=None ): + """ + First part of weight calculation. + + Calculate the weights contribution from a pair single pair of + coordinate bounds. Search for pairs of overlapping source and + target bounds and associate weights with them. + + Note: this assumes that the bounds are monotonic. + """ m = len(tgt_bounds) - 1 n = len(src_bounds) - 1 @@ -531,6 +545,13 @@ def _get_coord_to_coord_matrix( def _combine_xy_weights(x_info, y_info, src_shape, tgt_shape): + """ + Second part of weight calculation. + + Combine the weights contributions from a pair both pairs of coordinate + bounds (i.e. the source/target pairs for the x and y coords). + Return the result as a sparse array. + """ x_src, y_src = src_shape x_tgt, y_tgt = tgt_shape src_size = x_src * y_src @@ -553,7 +574,12 @@ def _combine_xy_weights(x_info, y_info, src_shape, tgt_shape): return combined_weights -def _regrid_no_masks(data, weights, tgt_shape): +def _standard_regrid_no_masks(data, weights, tgt_shape): + """ + Regrid unmasked data to an unmasked result. + + Assumes that the first two dimensions are the x-y grid. + """ extra_dims = len(data.shape) > 2 if extra_dims: extra_shape = data.shape[2:] @@ -569,16 +595,21 @@ def _regrid_no_masks(data, weights, tgt_shape): def _standard_regrid(data, weights, tgt_shape, mdtol, oob_invalid=True): + """ + Regrid data and handle masks. + + Assumes that the first two dimensions are the x-y grid. + """ data_shape = data.shape if ma.is_masked(data): unmasked = ~ma.getmaskarray(data) - weight_sums = _regrid_no_masks(unmasked, weights, tgt_shape) + weight_sums = _standard_regrid_no_masks(unmasked, weights, tgt_shape) else: weight_sums = np.ones(tgt_shape + data_shape[2:]) mdtol = max(mdtol, 1e-8) tgt_mask = weight_sums > 1 - mdtol if oob_invalid: - inbound_sums = _regrid_no_masks( + inbound_sums = _standard_regrid_no_masks( np.ones(data_shape[:2]), weights, tgt_shape ) oob_mask = inbound_sums > 1 - 1e-8 @@ -600,13 +631,16 @@ def _standard_regrid(data, weights, tgt_shape, mdtol, oob_invalid=True): # dtype when necessary. dtype = np.promote_types(data.dtype, np.float16) - result = _regrid_no_masks(ma.getdata(data), weights, tgt_shape) + result = _standard_regrid_no_masks( + ma.filled(data, 0.0), weights, tgt_shape + ) result = result * normalisations result = result.astype(dtype) return result def _regrid_along_dims(data, x_dim, y_dim, weights, tgt_shape, mdtol): + """Regrid data, handling masks and dimensions.""" # TODO: check that this is equivalent to the reordering in curvilinear regridding! if x_dim is None: x_none = True From a228281c4d6ee207dc21742ee9d58d51384b036b Mon Sep 17 00:00:00 2001 From: "stephen.worsley" Date: Mon, 23 Oct 2023 16:38:41 +0100 Subject: [PATCH 15/21] minor fixes --- lib/iris/analysis/_area_weighted.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/lib/iris/analysis/_area_weighted.py b/lib/iris/analysis/_area_weighted.py index ebbeaf8ea7..2c2a2504e4 100644 --- a/lib/iris/analysis/_area_weighted.py +++ b/lib/iris/analysis/_area_weighted.py @@ -548,7 +548,7 @@ def _combine_xy_weights(x_info, y_info, src_shape, tgt_shape): """ Second part of weight calculation. - Combine the weights contributions from a pair both pairs of coordinate + Combine the weights contributions from both pairs of coordinate bounds (i.e. the source/target pairs for the x and y coords). Return the result as a sparse array. """ @@ -608,11 +608,14 @@ def _standard_regrid(data, weights, tgt_shape, mdtol, oob_invalid=True): weight_sums = np.ones(tgt_shape + data_shape[2:]) mdtol = max(mdtol, 1e-8) tgt_mask = weight_sums > 1 - mdtol - if oob_invalid: + if oob_invalid or not ma.is_masked(data): inbound_sums = _standard_regrid_no_masks( np.ones(data_shape[:2]), weights, tgt_shape ) - oob_mask = inbound_sums > 1 - 1e-8 + if oob_invalid: + oob_mask = inbound_sums > 1 - 1e-8 + else: + oob_mask = inbound_sums > 1 - mdtol oob_slice = np.s_[:, :] + ((np.newaxis,) * len(data.shape[2:])) tgt_mask = tgt_mask * oob_mask[oob_slice] masked_weight_sums = weight_sums * tgt_mask From 31ef5934caa4123a9637e14cbe4a9bd2dd0d6d78 Mon Sep 17 00:00:00 2001 From: "stephen.worsley" Date: Wed, 25 Oct 2023 12:40:54 +0100 Subject: [PATCH 16/21] change dimension ordering to match curvilinear regridding --- lib/iris/analysis/_area_weighted.py | 52 +++++++++++++++-------------- 1 file changed, 27 insertions(+), 25 deletions(-) diff --git a/lib/iris/analysis/_area_weighted.py b/lib/iris/analysis/_area_weighted.py index 2c2a2504e4..5b0aacfd47 100644 --- a/lib/iris/analysis/_area_weighted.py +++ b/lib/iris/analysis/_area_weighted.py @@ -321,7 +321,6 @@ def _regrid_area_weighted_rectilinear_src_and_grid__prepare( grid_x_bounds = _get_bounds_in_units(grid_x, x_units, dtype) grid_y_bounds = _get_bounds_in_units(grid_y, y_units, dtype) - # TODO: consider removing this. # Create 2d meshgrids as required by _create_cube func. meshgrid_x, meshgrid_y = _meshgrid(grid_x.points, grid_y.points) @@ -582,13 +581,13 @@ def _standard_regrid_no_masks(data, weights, tgt_shape): """ extra_dims = len(data.shape) > 2 if extra_dims: - extra_shape = data.shape[2:] - result = data.reshape(np.prod(data.shape[:2]), -1) + extra_shape = data.shape[:-2] + result = data.reshape(-1, np.prod(data.shape[-2:])) else: result = data.flatten() - result = weights @ result + result = result @ weights.T if extra_dims: - result = result.reshape(*(tgt_shape + extra_shape)) + result = result.reshape(*(extra_shape + tgt_shape)) else: result = result.reshape(*tgt_shape) return result @@ -605,18 +604,18 @@ def _standard_regrid(data, weights, tgt_shape, mdtol, oob_invalid=True): unmasked = ~ma.getmaskarray(data) weight_sums = _standard_regrid_no_masks(unmasked, weights, tgt_shape) else: - weight_sums = np.ones(tgt_shape + data_shape[2:]) + weight_sums = np.ones(data_shape[:-2] + tgt_shape) mdtol = max(mdtol, 1e-8) tgt_mask = weight_sums > 1 - mdtol if oob_invalid or not ma.is_masked(data): inbound_sums = _standard_regrid_no_masks( - np.ones(data_shape[:2]), weights, tgt_shape + np.ones(data_shape[-2:]), weights, tgt_shape ) if oob_invalid: oob_mask = inbound_sums > 1 - 1e-8 else: oob_mask = inbound_sums > 1 - mdtol - oob_slice = np.s_[:, :] + ((np.newaxis,) * len(data.shape[2:])) + oob_slice = ((np.newaxis,) * len(data.shape[:-2])) + np.s_[:, :] tgt_mask = tgt_mask * oob_mask[oob_slice] masked_weight_sums = weight_sums * tgt_mask normalisations = np.ones_like(weight_sums) @@ -644,32 +643,35 @@ def _standard_regrid(data, weights, tgt_shape, mdtol, oob_invalid=True): def _regrid_along_dims(data, x_dim, y_dim, weights, tgt_shape, mdtol): """Regrid data, handling masks and dimensions.""" - # TODO: check that this is equivalent to the reordering in curvilinear regridding! + + # Handle scalar coordinates. + # Note: scalar source coordinates are only handled when their + # corresponding target coordinate is also scalar. if x_dim is None: x_none = True - data = np.expand_dims(data, 0) - x_dim = 0 - if y_dim is not None: - y_dim += 1 + data = np.expand_dims(data, -1) + x_dim = -1 else: x_none = False if y_dim is None: y_none = True - data = np.expand_dims(data, 0) - y_dim = 0 - x_dim += 1 + data = np.expand_dims(data, -1) + y_dim = -1 + if x_none: + x_dim = -2 else: y_none = False - data = np.moveaxis(data, [x_dim, y_dim], [0, 1]) + + # TODO: decide if standard regridding should expect (x,y) or (y,x) ordering + # Standard regridding expects the last two dimensions to belong + # to the x and y coordinate and will output as such. + # Axes are moved to account for an arbitrary dimension ordering. + data = np.moveaxis(data, [x_dim, y_dim], [-2, -1]) result = _standard_regrid(data, weights, tgt_shape, mdtol) - # Ensure consistent ordering with old behaviour - if ma.isMaskedArray(result): - result = ma.array(result, order="F") - else: - result = np.array(result, order="F") - result = np.moveaxis(result, [0, 1], [x_dim, y_dim]) + result = np.moveaxis(result, [-2, -1], [x_dim, y_dim]) + if y_none: - result = result[0] + result = np.squeeze(result, axis=-1) if x_none: - result = result[0] + result = np.squeeze(result, axis=-1) return result From 90130ccc054b336339efeb37458fcaeaf566aa39 Mon Sep 17 00:00:00 2001 From: "stephen.worsley" Date: Tue, 7 Nov 2023 15:57:46 +0000 Subject: [PATCH 17/21] make x-y ordering more consistent with existing implementations --- lib/iris/analysis/_area_weighted.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/lib/iris/analysis/_area_weighted.py b/lib/iris/analysis/_area_weighted.py index 5b0aacfd47..d4a718204c 100644 --- a/lib/iris/analysis/_area_weighted.py +++ b/lib/iris/analysis/_area_weighted.py @@ -403,7 +403,7 @@ def _regrid_area_weighted_rectilinear_src_and_grid__perform( weights, ) = regrid_info - tgt_shape = (len(grid_x.points), len(grid_y.points)) + tgt_shape = (len(grid_y.points), len(grid_x.points)) # Calculate new data array for regridded cube. regrid = functools.partial( @@ -558,13 +558,13 @@ def _combine_xy_weights(x_info, y_info, src_shape, tgt_shape): x_weight, x_rows, x_cols = x_info y_weight, y_rows, y_cols = y_info - xy_weight = x_weight[:, np.newaxis] * y_weight[np.newaxis, :] + xy_weight = y_weight[:, np.newaxis] * x_weight[np.newaxis, :] xy_weight = xy_weight.flatten() - xy_rows = (x_rows[:, np.newaxis] * y_tgt) + y_rows[np.newaxis, :] + xy_rows = (y_rows[:, np.newaxis] * x_tgt) + x_rows[np.newaxis, :] xy_rows = xy_rows.flatten() - xy_cols = (x_cols[:, np.newaxis] * y_src) + y_cols[np.newaxis, :] + xy_cols = (y_cols[:, np.newaxis] * x_src) + x_cols[np.newaxis, :] xy_cols = xy_cols.flatten() combined_weights = csr_array( @@ -662,13 +662,12 @@ def _regrid_along_dims(data, x_dim, y_dim, weights, tgt_shape, mdtol): else: y_none = False - # TODO: decide if standard regridding should expect (x,y) or (y,x) ordering # Standard regridding expects the last two dimensions to belong - # to the x and y coordinate and will output as such. + # to the y and x coordinate and will output as such. # Axes are moved to account for an arbitrary dimension ordering. - data = np.moveaxis(data, [x_dim, y_dim], [-2, -1]) + data = np.moveaxis(data, [y_dim, x_dim], [-2, -1]) result = _standard_regrid(data, weights, tgt_shape, mdtol) - result = np.moveaxis(result, [-2, -1], [x_dim, y_dim]) + result = np.moveaxis(result, [-2, -1], [y_dim, x_dim]) if y_none: result = np.squeeze(result, axis=-1) From 86960a00648268fe5dd9a24e668bf0c0fbf5498f Mon Sep 17 00:00:00 2001 From: "stephen.worsley" Date: Wed, 8 Nov 2023 14:41:25 +0000 Subject: [PATCH 18/21] add documentation, tidy code --- lib/iris/analysis/_area_weighted.py | 45 +++++++++++++------ .../test_AreaWeightedRegridder.py | 3 +- 2 files changed, 32 insertions(+), 16 deletions(-) diff --git a/lib/iris/analysis/_area_weighted.py b/lib/iris/analysis/_area_weighted.py index d4a718204c..4bf6dedbb2 100644 --- a/lib/iris/analysis/_area_weighted.py +++ b/lib/iris/analysis/_area_weighted.py @@ -579,17 +579,17 @@ def _standard_regrid_no_masks(data, weights, tgt_shape): Assumes that the first two dimensions are the x-y grid. """ - extra_dims = len(data.shape) > 2 - if extra_dims: - extra_shape = data.shape[:-2] - result = data.reshape(-1, np.prod(data.shape[-2:])) - else: - result = data.flatten() - result = result @ weights.T - if extra_dims: - result = result.reshape(*(extra_shape + tgt_shape)) - else: - result = result.reshape(*tgt_shape) + # Reshape data to a form suitable for matrix multiplication. + extra_shape = data.shape[:-2] + data = data.reshape(-1, np.prod(data.shape[-2:])) + + # Apply regridding weights. + # The order of matrix multiplication is chosen to be consistent + # with existing regridding code. + result = data @ weights.T + + # Reshape result to a suitable form. + result = result.reshape(*(extra_shape + tgt_shape)) return result @@ -602,26 +602,41 @@ def _standard_regrid(data, weights, tgt_shape, mdtol, oob_invalid=True): data_shape = data.shape if ma.is_masked(data): unmasked = ~ma.getmaskarray(data) + # Calculate contribution from unmasked sources to each target point. weight_sums = _standard_regrid_no_masks(unmasked, weights, tgt_shape) else: + # If there are no masked points then all contributions will be + # from unmasked sources, so we can skip this calculation weight_sums = np.ones(data_shape[:-2] + tgt_shape) mdtol = max(mdtol, 1e-8) tgt_mask = weight_sums > 1 - mdtol + # If out of bounds sources are treated the same as masked sources this + # will already have been calculated above, so we can skip this calculation. if oob_invalid or not ma.is_masked(data): + # Calculate the proportion of each target cell which is covered by the + # source. For the sake of efficiency, this is calculated for a 2D slice + # which is then broadcast. inbound_sums = _standard_regrid_no_masks( np.ones(data_shape[-2:]), weights, tgt_shape ) if oob_invalid: + # Legacy behaviour, if the full area of a target cell does not lie + # in bounds it will be masked. oob_mask = inbound_sums > 1 - 1e-8 else: oob_mask = inbound_sums > 1 - mdtol + # Broadcast the mask to the shape of the full array oob_slice = ((np.newaxis,) * len(data.shape[:-2])) + np.s_[:, :] tgt_mask = tgt_mask * oob_mask[oob_slice] - masked_weight_sums = weight_sums * tgt_mask + + # Calculate normalisations. normalisations = np.ones_like(weight_sums) - normalisations[tgt_mask] /= masked_weight_sums[tgt_mask] + normalisations *= tgt_mask + normalisations[tgt_mask] /= weight_sums[tgt_mask] + # Mask points in the result. if ma.isMaskedArray(data): + # If the source is masked, the result should have a similar mask. fill_value = data.fill_value normalisations = ma.array( normalisations, mask=~tgt_mask, fill_value=fill_value @@ -633,9 +648,11 @@ def _standard_regrid(data, weights, tgt_shape, mdtol, oob_invalid=True): # dtype when necessary. dtype = np.promote_types(data.dtype, np.float16) + # Perform regridding on unmasked data. result = _standard_regrid_no_masks( ma.filled(data, 0.0), weights, tgt_shape ) + # Apply normalisations and masks to the regridded data. result = result * normalisations result = result.astype(dtype) return result @@ -658,7 +675,7 @@ def _regrid_along_dims(data, x_dim, y_dim, weights, tgt_shape, mdtol): data = np.expand_dims(data, -1) y_dim = -1 if x_none: - x_dim = -2 + y_dim = -2 else: y_none = False diff --git a/lib/iris/tests/unit/analysis/area_weighted/test_AreaWeightedRegridder.py b/lib/iris/tests/unit/analysis/area_weighted/test_AreaWeightedRegridder.py index c143407bb6..6a0c6afdc8 100644 --- a/lib/iris/tests/unit/analysis/area_weighted/test_AreaWeightedRegridder.py +++ b/lib/iris/tests/unit/analysis/area_weighted/test_AreaWeightedRegridder.py @@ -51,8 +51,7 @@ def check_mdtol(self, mdtol=None): _regrid_info = _regrid_area_weighted_rectilinear_src_and_grid__prepare( src_grid, target_grid ) - # TODO: understand and write an equivalent test. - # self.assertEqual(len(_regrid_info), 10) + self.assertEqual(len(_regrid_info), 9) with mock.patch( "iris.analysis._area_weighted." "_regrid_area_weighted_rectilinear_src_and_grid__prepare", From fafaadeb11df765c0c153bbc318240fb75795cec Mon Sep 17 00:00:00 2001 From: "stephen.worsley" Date: Wed, 8 Nov 2023 16:05:11 +0000 Subject: [PATCH 19/21] add documentation, reset test --- lib/iris/analysis/_area_weighted.py | 17 ++++++++++++++--- ...id_area_weighted_rectilinear_src_and_grid.py | 2 +- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/lib/iris/analysis/_area_weighted.py b/lib/iris/analysis/_area_weighted.py index 4bf6dedbb2..b68712b575 100644 --- a/lib/iris/analysis/_area_weighted.py +++ b/lib/iris/analysis/_area_weighted.py @@ -558,15 +558,22 @@ def _combine_xy_weights(x_info, y_info, src_shape, tgt_shape): x_weight, x_rows, x_cols = x_info y_weight, y_rows, y_cols = y_info + # Regridding weights will be applied to a flattened (y, x) array. + # Weights and indices are constructed in a way to account for this. + # Weights of the combined matrix are constructed by broadcasting + # the x_weights and y_weights. The resulting array contains every + # combination of x weight and y weight. Then we flatten this array. xy_weight = y_weight[:, np.newaxis] * x_weight[np.newaxis, :] xy_weight = xy_weight.flatten() + # Given the x index and y index associated with a weight, calculate + # the equivalent index in the flattened (y, x) array. xy_rows = (y_rows[:, np.newaxis] * x_tgt) + x_rows[np.newaxis, :] xy_rows = xy_rows.flatten() - xy_cols = (y_cols[:, np.newaxis] * x_src) + x_cols[np.newaxis, :] xy_cols = xy_cols.flatten() + # Create a sparse matrix for efficient weight application. combined_weights = csr_array( (xy_weight, (xy_rows, xy_cols)), shape=(tgt_size, src_size) ) @@ -593,12 +600,17 @@ def _standard_regrid_no_masks(data, weights, tgt_shape): return result -def _standard_regrid(data, weights, tgt_shape, mdtol, oob_invalid=True): +def _standard_regrid(data, weights, tgt_shape, mdtol): """ Regrid data and handle masks. Assumes that the first two dimensions are the x-y grid. """ + # This is set to keep consistent with legacy behaviour. + # This is likely to become switchable in the future, see: + # https://github.com/SciTools/iris/issues/5461 + oob_invalid = True + data_shape = data.shape if ma.is_masked(data): unmasked = ~ma.getmaskarray(data) @@ -660,7 +672,6 @@ def _standard_regrid(data, weights, tgt_shape, mdtol, oob_invalid=True): def _regrid_along_dims(data, x_dim, y_dim, weights, tgt_shape, mdtol): """Regrid data, handling masks and dimensions.""" - # Handle scalar coordinates. # Note: scalar source coordinates are only handled when their # corresponding target coordinate is also scalar. diff --git a/lib/iris/tests/experimental/regrid/test_regrid_area_weighted_rectilinear_src_and_grid.py b/lib/iris/tests/experimental/regrid/test_regrid_area_weighted_rectilinear_src_and_grid.py index 9118ffcf8f..7cef33b11e 100644 --- a/lib/iris/tests/experimental/regrid/test_regrid_area_weighted_rectilinear_src_and_grid.py +++ b/lib/iris/tests/experimental/regrid/test_regrid_area_weighted_rectilinear_src_and_grid.py @@ -263,7 +263,7 @@ def test_equal_area_numbers(self): dest = _subsampled_grid(src, 2, 3) res = regrid_area_weighted(src, dest) expected_val_left = np.mean(src.data[:, 0:2]) - self.assertAlmostEqual(expected_val_left, res.data[0]) + self.assertEqual(expected_val_left, res.data[0]) expected_val_right = np.mean(src.data[:, 2:4]) self.assertAlmostEqual(expected_val_right, res.data[1]) From 53969183de7a3c28f424f8e8040dca963c01b857 Mon Sep 17 00:00:00 2001 From: "stephen.worsley" Date: Wed, 8 Nov 2023 16:53:20 +0000 Subject: [PATCH 20/21] add documentation --- lib/iris/analysis/_area_weighted.py | 52 ++++++++++++++++++++--------- 1 file changed, 36 insertions(+), 16 deletions(-) diff --git a/lib/iris/analysis/_area_weighted.py b/lib/iris/analysis/_area_weighted.py index b68712b575..7afbae40fd 100644 --- a/lib/iris/analysis/_area_weighted.py +++ b/lib/iris/analysis/_area_weighted.py @@ -473,9 +473,11 @@ def _get_coord_to_coord_matrix_info( Note: this assumes that the bounds are monotonic. """ + # Calculate the number of cells represented by the bounds. m = len(tgt_bounds) - 1 n = len(src_bounds) - 1 + # Ensure bounds are strictly increasing. src_decreasing = src_bounds[0] > src_bounds[1] tgt_decreasing = tgt_bounds[0] > tgt_bounds[1] if src_decreasing: @@ -484,6 +486,11 @@ def _get_coord_to_coord_matrix_info( tgt_bounds = tgt_bounds[::-1] if circular: + # For circular coordinates (e.g. longitude) account for source and + # target bounds which span different ranges (e.g. (-180, 180) vs + # (0, 360)). We ensure that all possible overlaps between source and + # target bounds are accounted for by including two copies of the + # source bounds, shifted appropriately by the modulus. adjust = (tgt_bounds.min() - src_bounds.min()) // mod src_bounds = src_bounds + (mod * adjust) src_bounds = np.append(src_bounds, src_bounds + mod) @@ -491,6 +498,9 @@ def _get_coord_to_coord_matrix_info( else: nn = n + # Before iterating through pairs of overlapping bounds, find an + # appropriate place to start iteration. Note that this assumes that + # the bounds are increasing. i = max(np.searchsorted(tgt_bounds, src_bounds[0], side="right") - 1, 0) j = max(np.searchsorted(src_bounds, tgt_bounds[0], side="right") - 1, 0) @@ -498,48 +508,58 @@ def _get_coord_to_coord_matrix_info( rows = [] cols = [] + # Iterate through overlapping cells in the source and target bounds. + # For the sake of calculations, we keep track of the minimum value of + # the intersection of each cell. floor = max(tgt_bounds[i], src_bounds[j]) while i < m and j < nn: + # Record the current indices. rows.append(i) cols.append(j) + + # Determine the next indices and floor. if tgt_bounds[i + 1] < src_bounds[j + 1]: - weight = (tgt_bounds[i + 1] - floor) / ( - tgt_bounds[i + 1] - tgt_bounds[i] - ) - floor = tgt_bounds[i + 1] - i += 1 + next_floor = tgt_bounds[i + 1] + next_i = i + 1 elif tgt_bounds[i + 1] == src_bounds[j + 1]: - weight = (tgt_bounds[i + 1] - floor) / ( - tgt_bounds[i + 1] - tgt_bounds[i] - ) - floor = tgt_bounds[i + 1] - i += 1 + next_floor = tgt_bounds[i + 1] + next_i = i + 1 j += 1 else: - weight = (src_bounds[j + 1] - floor) / ( - tgt_bounds[i + 1] - tgt_bounds[i] - ) - floor = src_bounds[j + 1] + next_floor = src_bounds[j + 1] + next_i = i j += 1 + + # Calculate and record the weight for the current overlapping cells. + weight = (next_floor - floor) / (tgt_bounds[i + 1] - tgt_bounds[i]) data.append(weight) + # Update indices and floor + i = next_i + floor = next_floor + data = np.array(data) rows = np.array(rows) cols = np.array(cols) if circular: - # remove out of bounds points + # Remove out of bounds points. When the source bounds were duplicated + # an "out of bounds" cell was introduced between the two copies. oob = np.where(cols == n) data = np.delete(data, oob) rows = np.delete(rows, oob) cols = np.delete(cols, oob) - # wrap indices + + # Wrap indices. Since we duplicated the source bounds there may be + # indices which are greater than n which will need to be corrected. cols = cols % (n + 1) + # Correct indices which were flipped due to reversing decreasing bounds. if src_decreasing: cols = n - cols - 1 if tgt_decreasing: rows = m - rows - 1 + return data, rows, cols From 7515c50440950b6cfa85be7e410e72de14fa4c7d Mon Sep 17 00:00:00 2001 From: "stephen.worsley" Date: Mon, 20 Nov 2023 15:40:42 +0000 Subject: [PATCH 21/21] address review comments --- lib/iris/analysis/_area_weighted.py | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/lib/iris/analysis/_area_weighted.py b/lib/iris/analysis/_area_weighted.py index 7afbae40fd..cf3ce407d4 100644 --- a/lib/iris/analysis/_area_weighted.py +++ b/lib/iris/analysis/_area_weighted.py @@ -467,7 +467,7 @@ def _get_coord_to_coord_matrix_info( """ First part of weight calculation. - Calculate the weights contribution from a pair single pair of + Calculate the weights contribution from a single pair of coordinate bounds. Search for pairs of overlapping source and target bounds and associate weights with them. @@ -656,14 +656,16 @@ def _standard_regrid(data, weights, tgt_shape, mdtol): # in bounds it will be masked. oob_mask = inbound_sums > 1 - 1e-8 else: + # Note: this code is currently inaccessible. This code exists to lay + # the groundwork for future work which will make out of bounds + # behaviour switchable. oob_mask = inbound_sums > 1 - mdtol # Broadcast the mask to the shape of the full array oob_slice = ((np.newaxis,) * len(data.shape[:-2])) + np.s_[:, :] tgt_mask = tgt_mask * oob_mask[oob_slice] # Calculate normalisations. - normalisations = np.ones_like(weight_sums) - normalisations *= tgt_mask + normalisations = tgt_mask.astype(weight_sums.dtype) normalisations[tgt_mask] /= weight_sums[tgt_mask] # Mask points in the result. @@ -695,20 +697,17 @@ def _regrid_along_dims(data, x_dim, y_dim, weights, tgt_shape, mdtol): # Handle scalar coordinates. # Note: scalar source coordinates are only handled when their # corresponding target coordinate is also scalar. + num_scalar_dims = 0 if x_dim is None: - x_none = True + num_scalar_dims += 1 data = np.expand_dims(data, -1) x_dim = -1 - else: - x_none = False if y_dim is None: - y_none = True + num_scalar_dims += 1 data = np.expand_dims(data, -1) y_dim = -1 - if x_none: + if num_scalar_dims == 2: y_dim = -2 - else: - y_none = False # Standard regridding expects the last two dimensions to belong # to the y and x coordinate and will output as such. @@ -717,8 +716,6 @@ def _regrid_along_dims(data, x_dim, y_dim, weights, tgt_shape, mdtol): result = _standard_regrid(data, weights, tgt_shape, mdtol) result = np.moveaxis(result, [-2, -1], [y_dim, x_dim]) - if y_none: - result = np.squeeze(result, axis=-1) - if x_none: + for _ in range(num_scalar_dims): result = np.squeeze(result, axis=-1) return result