diff --git a/pyresample/geometry.py b/pyresample/geometry.py index 158902581..07dddc79e 100644 --- a/pyresample/geometry.py +++ b/pyresample/geometry.py @@ -1115,11 +1115,64 @@ def _extract_lons_lats(self, lonslats): return lons, lats -def invproj(data_x, data_y, proj_dict): +def _invproj(data_x, data_y, proj_dict): """Perform inverse projection.""" # XXX: does pyproj copy arrays? What can we do so it doesn't? target_proj = Proj(proj_dict) - return np.dstack(target_proj(data_x, data_y, inverse=True)) + lon, lat = target_proj(data_x, data_y, inverse=True) + return np.stack([lon.astype(data_x.dtype), lat.astype(data_y.dtype)]) + + +def _generate_2d_coords(pixel_size_x, pixel_size_y, pixel_upper_left_x, pixel_upper_left_y, + chunks, dtype, block_info=None): + start_y_idx = block_info[None]["array-location"][1][0] + end_y_idx = block_info[None]["array-location"][1][1] + start_x_idx = block_info[None]["array-location"][2][0] + end_x_idx = block_info[None]["array-location"][2][1] + dtype = block_info[None]["dtype"] + x, y = _generate_1d_proj_vectors((start_x_idx, end_x_idx), + (start_y_idx, end_y_idx), + (pixel_size_x, pixel_size_y), + (pixel_upper_left_x, pixel_upper_left_y), + dtype) + x_2d, y_2d = np.meshgrid(x, y) + res = np.stack([x_2d, y_2d]) + return res + + +def _generate_1d_proj_vectors(col_range, row_range, + pixel_size_xy, offset_xy, + dtype, chunks=None): + x_kwargs, y_kwargs, arange = _get_vector_arange_args(dtype, chunks) + x = arange(*col_range, **x_kwargs) * pixel_size_xy[0] + offset_xy[0] + y = arange(*row_range, **y_kwargs) * -pixel_size_xy[1] + offset_xy[1] + return x, y + + +def _get_vector_arange_args(dtype, chunks): + x_kwargs = {} + y_kwargs = {} + + y_chunks, x_chunks = _chunks_to_yx_chunks(chunks) + if x_chunks is not None or y_chunks is not None: + # use dask functions instead of numpy + from dask.array import arange + x_kwargs = {'chunks': x_chunks} + y_kwargs = {'chunks': y_chunks} + else: + arange = np.arange + x_kwargs['dtype'] = dtype + y_kwargs['dtype'] = dtype + return x_kwargs, y_kwargs, arange + + +def _chunks_to_yx_chunks(chunks): + if chunks is not None and not isinstance(chunks, int): + y_chunks = chunks[0] + x_chunks = chunks[1] + else: + y_chunks = x_chunks = chunks + return y_chunks, x_chunks class _ProjectionDefinition(BaseDefinition): @@ -2040,15 +2093,10 @@ def get_lonlat(self, row, col): @staticmethod def _do_rotation(xspan, yspan, rot_deg=0): """Apply a rotation factor to a matrix of points.""" - if hasattr(xspan, 'chunks'): - # we were given dask arrays, use dask functions - import dask.array as numpy - else: - numpy = np - rot_rad = numpy.radians(rot_deg) - rot_mat = numpy.array([[np.cos(rot_rad), np.sin(rot_rad)], [-np.sin(rot_rad), np.cos(rot_rad)]]) - x, y = numpy.meshgrid(xspan, yspan) - return numpy.einsum('ji, mni -> jmn', rot_mat, numpy.dstack([x, y])) + rot_rad = np.radians(rot_deg) + rot_mat = np.array([[np.cos(rot_rad), np.sin(rot_rad)], [-np.sin(rot_rad), np.cos(rot_rad)]]) + x, y = np.meshgrid(xspan, yspan) + return np.einsum('ji, mni -> jmn', rot_mat, np.dstack([x, y])) def get_proj_vectors_dask(self, chunks=None, dtype=None): """Get projection vectors.""" @@ -2060,32 +2108,16 @@ def get_proj_vectors_dask(self, chunks=None, dtype=None): def _get_proj_vectors(self, dtype=None, check_rotation=True, chunks=None): """Get 1D projection coordinates.""" - x_kwargs = {} - y_kwargs = {} - - if chunks is not None and not isinstance(chunks, int): - y_chunks = chunks[0] - x_chunks = chunks[1] - else: - y_chunks = x_chunks = chunks - - if x_chunks is not None or y_chunks is not None: - # use dask functions instead of numpy - from dask.array import arange - x_kwargs = {'chunks': x_chunks} - y_kwargs = {'chunks': y_chunks} - else: - arange = np.arange if check_rotation and self.rotation != 0: warnings.warn("Projection vectors will not be accurate because rotation is not 0", RuntimeWarning) if dtype is None: dtype = self.dtype - x_kwargs['dtype'] = dtype - y_kwargs['dtype'] = dtype - - target_x = arange(self.width, **x_kwargs) * self.pixel_size_x + self.pixel_upper_left[0] - target_y = arange(self.height, **y_kwargs) * -self.pixel_size_y + self.pixel_upper_left[1] - return target_x, target_y + x, y = _generate_1d_proj_vectors((0, self.width), + (0, self.height), + (self.pixel_size_x, self.pixel_size_y), + (self.pixel_upper_left[0], self.pixel_upper_left[1]), + dtype, chunks=chunks) + return x, y def get_proj_vectors(self, dtype=None, chunks=None): """Calculate 1D projection coordinates for the X and Y dimension. @@ -2139,24 +2171,67 @@ def get_proj_coords(self, data_slice=None, dtype=None, chunks=None): Removed 'cache' keyword argument and add 'chunks' for creating dask arrays. """ + if self.rotation != 0 and chunks is not None: + raise ValueError("'rotation' is not supported with dask operations.") + if dtype is None: + dtype = self.dtype + y_slice, x_slice = self._get_yx_data_slice(data_slice) + if chunks is not None: + target_x, target_y = self._proj_coords_dask(chunks, dtype) + if y_slice is not None: + target_x = target_x[y_slice, x_slice] + target_y = target_y[y_slice, x_slice] + return target_x, target_y + target_x, target_y = self._get_proj_vectors(dtype=dtype, check_rotation=False, chunks=chunks) - if data_slice is not None and isinstance(data_slice, slice): - target_y = target_y[data_slice] - elif data_slice is not None: - target_y = target_y[data_slice[0]] - target_x = target_x[data_slice[1]] + if y_slice is not None: + target_y = target_y[y_slice] + if x_slice is not None: + target_x = target_x[x_slice] if self.rotation != 0: res = self._do_rotation(target_x, target_y, self.rotation) target_x, target_y = res[0, :, :], res[1, :, :] - elif chunks is not None: - import dask.array as da - target_x, target_y = da.meshgrid(target_x, target_y) else: target_x, target_y = np.meshgrid(target_x, target_y) return target_x, target_y + @staticmethod + def _get_yx_data_slice(data_slice): + if data_slice is not None and isinstance(data_slice, slice): + return data_slice, slice(None, None, None) + elif data_slice is not None: + return data_slice[0], data_slice[1] + return None, None + + def _proj_coords_dask(self, chunks, dtype): + """Generate 2D x and y coordinate arrays. + + This is a separate function because it allows dask to optimize and + separate the individual 2D chunks of coordinates. Using the basic + numpy form of these calculations produces an unnecessary + relationship between the "arange" 1D projection vectors and every + 2D coordinate chunk. This makes it difficult for dask to schedule + 2D chunks in an optimal way. + + """ + y_chunks, x_chunks = _chunks_to_yx_chunks(chunks) + norm_y_chunks, norm_x_chunks = da.core.normalize_chunks((y_chunks, x_chunks), self.shape, dtype=dtype) + # We must provide `chunks` and `dtype` as passed arguments to ensure + # the returned array has a unique dask name + # See: https://github.com/dask/dask/issues/8450 + res = da.map_blocks(_generate_2d_coords, + self.pixel_size_x, self.pixel_size_y, + self.pixel_upper_left[0], self.pixel_upper_left[1], + chunks, dtype, + chunks=((2,), norm_y_chunks, norm_x_chunks), + meta=np.array((), dtype=dtype), + dtype=dtype, + ) + target_x, target_y = res[0], res[1] + return target_x, target_y + @property def projection_x_coords(self): """Return projection X coordinates.""" @@ -2207,7 +2282,8 @@ def get_lonlats(self, nprocs=None, data_slice=None, cache=False, dtype=None, chu data_slice : slice object, optional Calculate only coordinates for specified slice cache : bool, optional - Store result the result. Requires data_slice to be None + Store the result internally for later reuse. Requires data_slice + to be None. dtype : numpy.dtype, optional Data type of the returned arrays chunks: int or tuple, optional @@ -2245,10 +2321,13 @@ def get_lonlats(self, nprocs=None, data_slice=None, cache=False, dtype=None, chu if hasattr(target_x, 'chunks'): # we are using dask arrays, map blocks to th from dask.array import map_blocks - res = map_blocks(invproj, target_x, target_y, - chunks=(target_x.chunks[0], target_x.chunks[1], 2), - new_axis=[2], proj_dict=self.crs_wkt).astype(dtype) - return res[:, :, 0], res[:, :, 1] + res = map_blocks(_invproj, target_x, target_y, + chunks=(2,) + target_x.chunks, + meta=np.array((), dtype=target_x.dtype), + dtype=target_x.dtype, + new_axis=[0], proj_dict=self.crs_wkt) + lons, lats = res[0], res[1] + return lons, lats if nprocs > 1: target_proj = Proj_MP(self.crs) diff --git a/pyresample/test/test_geometry.py b/pyresample/test/test_geometry.py index 46e402ac4..b3d0b9202 100644 --- a/pyresample/test/test_geometry.py +++ b/pyresample/test/test_geometry.py @@ -982,6 +982,11 @@ def test_get_proj_coords_dask(self): area_def = get_area_def(area_id, area_name, proj_id, proj_dict, x_size, y_size, area_extent) xcoord, ycoord = area_def.get_proj_coords(chunks=4096) + # make sure different chunk size provides a different dask name + xcoord2, ycoord2 = area_def.get_proj_coords(chunks=2048) + assert xcoord2.name != xcoord.name + assert ycoord2.name != ycoord.name + xcoord = xcoord.compute() ycoord = ycoord.compute() self.assertTrue(np.allclose(xcoord[0, :], @@ -2048,19 +2053,21 @@ def test_get_lonlats(self): np.testing.assert_allclose(lats[464:, :], lats1) # check that get_lonlats with chunks definition doesn't cause errors and output arrays are equal - # too many chunks - _check_final_area_lon_lat_with_chunks(final_area, lons, lats, chunks=((200, 264, 464), (5570,))) - # too few chunks - _check_final_area_lon_lat_with_chunks(final_area, lons, lats, chunks=((464,), (5568,))) - # right amount of chunks, same shape - _check_final_area_lon_lat_with_chunks(final_area, lons, lats, chunks=((464, 464), (5568,))) + with pytest.raises(ValueError): + # too many chunks + _check_final_area_lon_lat_with_chunks(final_area, lons, lats, chunks=((200, 264, 464), (5570,))) # right amount of chunks, different shape _check_final_area_lon_lat_with_chunks(final_area, lons, lats, chunks=((464, 470), (5568,))) - # only one set of chunks in a tuple - _check_final_area_lon_lat_with_chunks(final_area, lons, lats, chunks=(464, 5568)) # only one chunk value _check_final_area_lon_lat_with_chunks(final_area, lons, lats, chunks=464) + # only one set of chunks in a tuple + _check_final_area_lon_lat_with_chunks(final_area, lons, lats, chunks=(464, 5568)) + # too few chunks + _check_final_area_lon_lat_with_chunks(final_area, lons, lats, chunks=((464,), (5568,))) + # right amount of chunks, same shape + _check_final_area_lon_lat_with_chunks(final_area, lons, lats, chunks=((464, 464), (5568,))) + def test_combine_area_extents(self): """Test combination of area extents.""" area1 = MagicMock()