From c6212df77039afc6aa3a64f99d32b204d975eb43 Mon Sep 17 00:00:00 2001 From: David Hoese Date: Wed, 1 Dec 2021 14:50:03 -0600 Subject: [PATCH 1/5] Optimize AreaDefinition.get_proj_coords when requesting dask arrays --- pyresample/geometry.py | 88 +++++++++++++++++++++++++++++++----------- 1 file changed, 65 insertions(+), 23 deletions(-) diff --git a/pyresample/geometry.py b/pyresample/geometry.py index 158902581..f20c3bd29 100644 --- a/pyresample/geometry.py +++ b/pyresample/geometry.py @@ -1115,11 +1115,25 @@ def _extract_lons_lats(self, lonslats): return lons, lats -def invproj(data_x, data_y, proj_dict): +def _invproj(data_x, data_y, proj_dict): """Perform inverse projection.""" # XXX: does pyproj copy arrays? What can we do so it doesn't? target_proj = Proj(proj_dict) - return np.dstack(target_proj(data_x, data_y, inverse=True)) + lon, lat = target_proj(data_x, data_y, inverse=True) + return np.stack([lon.astype(data_x.dtype), lat.astype(data_y.dtype)]) + + +def _generate_2d_coords(pixel_size_x, pixel_size_y, pixel_upper_left_x, pixel_upper_left_y, block_info=None): + start_y_idx = block_info[None]["array-location"][1][0] + end_y_idx = block_info[None]["array-location"][1][1] + start_x_idx = block_info[None]["array-location"][2][0] + end_x_idx = block_info[None]["array-location"][2][1] + dtype = block_info[None]["dtype"] + x = np.arange(start_x_idx, end_x_idx, dtype=dtype) * pixel_size_x + pixel_upper_left_x + y = np.arange(start_y_idx, end_y_idx, dtype=dtype) * -pixel_size_y + pixel_upper_left_y + x_2d, y_2d = np.meshgrid(x, y) + res = np.stack([x_2d, y_2d]) + return res class _ProjectionDefinition(BaseDefinition): @@ -2040,15 +2054,10 @@ def get_lonlat(self, row, col): @staticmethod def _do_rotation(xspan, yspan, rot_deg=0): """Apply a rotation factor to a matrix of points.""" - if hasattr(xspan, 'chunks'): - # we were given dask arrays, use dask functions - import dask.array as numpy - else: - numpy = np - rot_rad = numpy.radians(rot_deg) - rot_mat = numpy.array([[np.cos(rot_rad), np.sin(rot_rad)], [-np.sin(rot_rad), np.cos(rot_rad)]]) - x, y = numpy.meshgrid(xspan, yspan) - return numpy.einsum('ji, mni -> jmn', rot_mat, numpy.dstack([x, y])) + rot_rad = np.radians(rot_deg) + rot_mat = np.array([[np.cos(rot_rad), np.sin(rot_rad)], [-np.sin(rot_rad), np.cos(rot_rad)]]) + x, y = np.meshgrid(xspan, yspan) + return np.einsum('ji, mni -> jmn', rot_mat, np.dstack([x, y])) def get_proj_vectors_dask(self, chunks=None, dtype=None): """Get projection vectors.""" @@ -2058,17 +2067,21 @@ def get_proj_vectors_dask(self, chunks=None, dtype=None): chunks = CHUNK_SIZE # FUTURE: Use a global config object instead return self.get_proj_vectors(dtype=dtype, chunks=chunks) - def _get_proj_vectors(self, dtype=None, check_rotation=True, chunks=None): - """Get 1D projection coordinates.""" - x_kwargs = {} - y_kwargs = {} - + @staticmethod + def _chunks_to_yx_chunks(chunks): if chunks is not None and not isinstance(chunks, int): y_chunks = chunks[0] x_chunks = chunks[1] else: y_chunks = x_chunks = chunks + return y_chunks, x_chunks + def _get_proj_vectors(self, dtype=None, check_rotation=True, chunks=None): + """Get 1D projection coordinates.""" + x_kwargs = {} + y_kwargs = {} + + y_chunks, x_chunks = self._chunks_to_xy_chunks(chunks) if x_chunks is not None or y_chunks is not None: # use dask functions instead of numpy from dask.array import arange @@ -2139,6 +2152,11 @@ def get_proj_coords(self, data_slice=None, dtype=None, chunks=None): Removed 'cache' keyword argument and add 'chunks' for creating dask arrays. """ + if self.rotation != 0 and chunks is not None: + raise ValueError("'rotation' is not supported with dask operations.") + if chunks is not None: + return self._proj_coords_dask(chunks, dtype) + target_x, target_y = self._get_proj_vectors(dtype=dtype, check_rotation=False, chunks=chunks) if data_slice is not None and isinstance(data_slice, slice): target_y = target_y[data_slice] @@ -2149,14 +2167,33 @@ def get_proj_coords(self, data_slice=None, dtype=None, chunks=None): if self.rotation != 0: res = self._do_rotation(target_x, target_y, self.rotation) target_x, target_y = res[0, :, :], res[1, :, :] - elif chunks is not None: - import dask.array as da - target_x, target_y = da.meshgrid(target_x, target_y) else: target_x, target_y = np.meshgrid(target_x, target_y) return target_x, target_y + def _proj_coords_dask(self, chunks, dtype): + """Generate 2D x and y coordinate arrays. + + This is a separate function because it allows dask to optimize and + separate the individual 2D chunks of coordinates. Using the basic + numpy form of these calculations produces an unnecessary + relationship between the "arange" 1D projection vectors and every + 2D coordinate chunk. This makes it difficult for dask to schedule + 2D chunks in an optimal way. + + """ + y_chunks, x_chunks = self._chunks_to_yx_chunks(chunks) + res = da.map_blocks(_generate_2d_coords, + self.pixel_size_x, self.pixel_size_y, + self.pixel_upper_left[0], self.pixel_upper_left[1], + chunks=(2,) + (y_chunks, x_chunks), + meta=np.array((), dtype=dtype), + dtype=dtype, + ) + target_x, target_y = res[0], res[1] + return target_x, target_y + @property def projection_x_coords(self): """Return projection X coordinates.""" @@ -2245,10 +2282,15 @@ def get_lonlats(self, nprocs=None, data_slice=None, cache=False, dtype=None, chu if hasattr(target_x, 'chunks'): # we are using dask arrays, map blocks to th from dask.array import map_blocks - res = map_blocks(invproj, target_x, target_y, - chunks=(target_x.chunks[0], target_x.chunks[1], 2), - new_axis=[2], proj_dict=self.crs_wkt).astype(dtype) - return res[:, :, 0], res[:, :, 1] + res = map_blocks(_invproj, target_x, target_y, + chunks=(2,) + target_x.chunks, + meta=np.array((), dtype=target_x.dtype), + dtype=target_x.dtype, + new_axis=[0], proj_dict=self.crs_wkt) + lons, lats = res[0], res[1] + self.lons = lons + self.lats = lats + return lons, lats if nprocs > 1: target_proj = Proj_MP(self.crs) From b4339d76c9be754619e278819410d4468d795acf Mon Sep 17 00:00:00 2001 From: David Hoese Date: Wed, 1 Dec 2021 20:12:15 -0600 Subject: [PATCH 2/5] Fix various issues introduced by get_proj_coords refactor --- pyresample/geometry.py | 34 +++++++++++++++++++++----------- pyresample/test/test_geometry.py | 18 +++++++++-------- 2 files changed, 33 insertions(+), 19 deletions(-) diff --git a/pyresample/geometry.py b/pyresample/geometry.py index f20c3bd29..40d3132b0 100644 --- a/pyresample/geometry.py +++ b/pyresample/geometry.py @@ -2081,7 +2081,7 @@ def _get_proj_vectors(self, dtype=None, check_rotation=True, chunks=None): x_kwargs = {} y_kwargs = {} - y_chunks, x_chunks = self._chunks_to_xy_chunks(chunks) + y_chunks, x_chunks = self._chunks_to_yx_chunks(chunks) if x_chunks is not None or y_chunks is not None: # use dask functions instead of numpy from dask.array import arange @@ -2154,15 +2154,19 @@ def get_proj_coords(self, data_slice=None, dtype=None, chunks=None): """ if self.rotation != 0 and chunks is not None: raise ValueError("'rotation' is not supported with dask operations.") + y_slice, x_slice = self._get_yx_data_slice(data_slice) if chunks is not None: - return self._proj_coords_dask(chunks, dtype) + target_x, target_y = self._proj_coords_dask(chunks, dtype) + if y_slice is not None: + target_x = target_x[y_slice, x_slice] + target_y = target_y[y_slice, x_slice] + return target_x, target_y target_x, target_y = self._get_proj_vectors(dtype=dtype, check_rotation=False, chunks=chunks) - if data_slice is not None and isinstance(data_slice, slice): - target_y = target_y[data_slice] - elif data_slice is not None: - target_y = target_y[data_slice[0]] - target_x = target_x[data_slice[1]] + if y_slice is not None: + target_y = target_y[y_slice] + if x_slice is not None: + target_x = target_x[x_slice] if self.rotation != 0: res = self._do_rotation(target_x, target_y, self.rotation) @@ -2172,6 +2176,14 @@ def get_proj_coords(self, data_slice=None, dtype=None, chunks=None): return target_x, target_y + @staticmethod + def _get_yx_data_slice(data_slice): + if data_slice is not None and isinstance(data_slice, slice): + return data_slice, slice(None, None, None) + elif data_slice is not None: + return data_slice[0], data_slice[1] + return None, None + def _proj_coords_dask(self, chunks, dtype): """Generate 2D x and y coordinate arrays. @@ -2184,10 +2196,11 @@ def _proj_coords_dask(self, chunks, dtype): """ y_chunks, x_chunks = self._chunks_to_yx_chunks(chunks) + norm_y_chunks, norm_x_chunks = da.core.normalize_chunks((y_chunks, x_chunks), self.shape, dtype=dtype) res = da.map_blocks(_generate_2d_coords, self.pixel_size_x, self.pixel_size_y, self.pixel_upper_left[0], self.pixel_upper_left[1], - chunks=(2,) + (y_chunks, x_chunks), + chunks=(2, norm_y_chunks, norm_x_chunks), meta=np.array((), dtype=dtype), dtype=dtype, ) @@ -2244,7 +2257,8 @@ def get_lonlats(self, nprocs=None, data_slice=None, cache=False, dtype=None, chu data_slice : slice object, optional Calculate only coordinates for specified slice cache : bool, optional - Store result the result. Requires data_slice to be None + Store the result internally for later reuse. Requires data_slice + to be None. dtype : numpy.dtype, optional Data type of the returned arrays chunks: int or tuple, optional @@ -2288,8 +2302,6 @@ def get_lonlats(self, nprocs=None, data_slice=None, cache=False, dtype=None, chu dtype=target_x.dtype, new_axis=[0], proj_dict=self.crs_wkt) lons, lats = res[0], res[1] - self.lons = lons - self.lats = lats return lons, lats if nprocs > 1: diff --git a/pyresample/test/test_geometry.py b/pyresample/test/test_geometry.py index 46e402ac4..5b632a0aa 100644 --- a/pyresample/test/test_geometry.py +++ b/pyresample/test/test_geometry.py @@ -2048,19 +2048,21 @@ def test_get_lonlats(self): np.testing.assert_allclose(lats[464:, :], lats1) # check that get_lonlats with chunks definition doesn't cause errors and output arrays are equal - # too many chunks - _check_final_area_lon_lat_with_chunks(final_area, lons, lats, chunks=((200, 264, 464), (5570,))) - # too few chunks - _check_final_area_lon_lat_with_chunks(final_area, lons, lats, chunks=((464,), (5568,))) - # right amount of chunks, same shape - _check_final_area_lon_lat_with_chunks(final_area, lons, lats, chunks=((464, 464), (5568,))) + with pytest.raises(ValueError): + # too many chunks + _check_final_area_lon_lat_with_chunks(final_area, lons, lats, chunks=((200, 264, 464), (5570,))) # right amount of chunks, different shape _check_final_area_lon_lat_with_chunks(final_area, lons, lats, chunks=((464, 470), (5568,))) - # only one set of chunks in a tuple - _check_final_area_lon_lat_with_chunks(final_area, lons, lats, chunks=(464, 5568)) # only one chunk value _check_final_area_lon_lat_with_chunks(final_area, lons, lats, chunks=464) + # only one set of chunks in a tuple + _check_final_area_lon_lat_with_chunks(final_area, lons, lats, chunks=(464, 5568)) + # too few chunks + _check_final_area_lon_lat_with_chunks(final_area, lons, lats, chunks=((464,), (5568,))) + # right amount of chunks, same shape + _check_final_area_lon_lat_with_chunks(final_area, lons, lats, chunks=((464, 464), (5568,))) + def test_combine_area_extents(self): """Test combination of area extents.""" area1 = MagicMock() From 87bd5faf7f56fcdba3484c7f2182e387deecaf69 Mon Sep 17 00:00:00 2001 From: David Hoese Date: Thu, 2 Dec 2021 20:56:46 -0600 Subject: [PATCH 3/5] Fix get_proj_coords not producing unique dask names --- pyresample/geometry.py | 11 +++++++++-- pyresample/test/test_geometry.py | 5 +++++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/pyresample/geometry.py b/pyresample/geometry.py index 40d3132b0..780fa5d8e 100644 --- a/pyresample/geometry.py +++ b/pyresample/geometry.py @@ -1123,7 +1123,8 @@ def _invproj(data_x, data_y, proj_dict): return np.stack([lon.astype(data_x.dtype), lat.astype(data_y.dtype)]) -def _generate_2d_coords(pixel_size_x, pixel_size_y, pixel_upper_left_x, pixel_upper_left_y, block_info=None): +def _generate_2d_coords(pixel_size_x, pixel_size_y, pixel_upper_left_x, pixel_upper_left_y, + chunks, dtype, block_info=None): start_y_idx = block_info[None]["array-location"][1][0] end_y_idx = block_info[None]["array-location"][1][1] start_x_idx = block_info[None]["array-location"][2][0] @@ -2154,6 +2155,8 @@ def get_proj_coords(self, data_slice=None, dtype=None, chunks=None): """ if self.rotation != 0 and chunks is not None: raise ValueError("'rotation' is not supported with dask operations.") + if dtype is None: + dtype = self.dtype y_slice, x_slice = self._get_yx_data_slice(data_slice) if chunks is not None: target_x, target_y = self._proj_coords_dask(chunks, dtype) @@ -2197,10 +2200,14 @@ def _proj_coords_dask(self, chunks, dtype): """ y_chunks, x_chunks = self._chunks_to_yx_chunks(chunks) norm_y_chunks, norm_x_chunks = da.core.normalize_chunks((y_chunks, x_chunks), self.shape, dtype=dtype) + # We must provide `chunks` and `dtype` as passed arguments to ensure + # the returned array has a unique dask name + # See: https://github.com/dask/dask/issues/8450 res = da.map_blocks(_generate_2d_coords, self.pixel_size_x, self.pixel_size_y, self.pixel_upper_left[0], self.pixel_upper_left[1], - chunks=(2, norm_y_chunks, norm_x_chunks), + chunks, dtype, + chunks=((2,), norm_y_chunks, norm_x_chunks), meta=np.array((), dtype=dtype), dtype=dtype, ) diff --git a/pyresample/test/test_geometry.py b/pyresample/test/test_geometry.py index 5b632a0aa..b3d0b9202 100644 --- a/pyresample/test/test_geometry.py +++ b/pyresample/test/test_geometry.py @@ -982,6 +982,11 @@ def test_get_proj_coords_dask(self): area_def = get_area_def(area_id, area_name, proj_id, proj_dict, x_size, y_size, area_extent) xcoord, ycoord = area_def.get_proj_coords(chunks=4096) + # make sure different chunk size provides a different dask name + xcoord2, ycoord2 = area_def.get_proj_coords(chunks=2048) + assert xcoord2.name != xcoord.name + assert ycoord2.name != ycoord.name + xcoord = xcoord.compute() ycoord = ycoord.compute() self.assertTrue(np.allclose(xcoord[0, :], From 6502b08c5070e3db90c2386643d506896d67f865 Mon Sep 17 00:00:00 2001 From: David Hoese Date: Fri, 3 Dec 2021 09:56:29 -0600 Subject: [PATCH 4/5] Refactor get_proj_vectors to reduce duplicate code --- pyresample/geometry.py | 78 ++++++++++++++++++++++++++---------------- 1 file changed, 49 insertions(+), 29 deletions(-) diff --git a/pyresample/geometry.py b/pyresample/geometry.py index 780fa5d8e..ece6b02a8 100644 --- a/pyresample/geometry.py +++ b/pyresample/geometry.py @@ -1130,13 +1130,53 @@ def _generate_2d_coords(pixel_size_x, pixel_size_y, pixel_upper_left_x, pixel_up start_x_idx = block_info[None]["array-location"][2][0] end_x_idx = block_info[None]["array-location"][2][1] dtype = block_info[None]["dtype"] - x = np.arange(start_x_idx, end_x_idx, dtype=dtype) * pixel_size_x + pixel_upper_left_x - y = np.arange(start_y_idx, end_y_idx, dtype=dtype) * -pixel_size_y + pixel_upper_left_y + x, y = _generate_1d_proj_vectors(start_x_idx, end_x_idx, + start_y_idx, end_y_idx, + pixel_size_x, pixel_size_y, + pixel_upper_left_x, pixel_upper_left_y, + dtype) x_2d, y_2d = np.meshgrid(x, y) res = np.stack([x_2d, y_2d]) return res +def _generate_1d_proj_vectors(start_x, end_x, + start_y, end_y, + pixel_size_x, pixel_size_y, + upper_left_x, upper_left_y, + dtype, chunks=None): + x_kwargs, y_kwargs, arange = _get_vector_arange_args(dtype, chunks) + x = arange(start_x, end_x, **x_kwargs) * pixel_size_x + upper_left_x + y = arange(start_y, end_y, **y_kwargs) * -pixel_size_y + upper_left_y + return x, y + + +def _get_vector_arange_args(dtype, chunks): + x_kwargs = {} + y_kwargs = {} + + y_chunks, x_chunks = _chunks_to_yx_chunks(chunks) + if x_chunks is not None or y_chunks is not None: + # use dask functions instead of numpy + from dask.array import arange + x_kwargs = {'chunks': x_chunks} + y_kwargs = {'chunks': y_chunks} + else: + arange = np.arange + x_kwargs['dtype'] = dtype + y_kwargs['dtype'] = dtype + return x_kwargs, y_kwargs, arange + + +def _chunks_to_yx_chunks(chunks): + if chunks is not None and not isinstance(chunks, int): + y_chunks = chunks[0] + x_chunks = chunks[1] + else: + y_chunks = x_chunks = chunks + return y_chunks, x_chunks + + class _ProjectionDefinition(BaseDefinition): """Base class for definitions based on CRS and area extents.""" @@ -2068,38 +2108,18 @@ def get_proj_vectors_dask(self, chunks=None, dtype=None): chunks = CHUNK_SIZE # FUTURE: Use a global config object instead return self.get_proj_vectors(dtype=dtype, chunks=chunks) - @staticmethod - def _chunks_to_yx_chunks(chunks): - if chunks is not None and not isinstance(chunks, int): - y_chunks = chunks[0] - x_chunks = chunks[1] - else: - y_chunks = x_chunks = chunks - return y_chunks, x_chunks - def _get_proj_vectors(self, dtype=None, check_rotation=True, chunks=None): """Get 1D projection coordinates.""" - x_kwargs = {} - y_kwargs = {} - - y_chunks, x_chunks = self._chunks_to_yx_chunks(chunks) - if x_chunks is not None or y_chunks is not None: - # use dask functions instead of numpy - from dask.array import arange - x_kwargs = {'chunks': x_chunks} - y_kwargs = {'chunks': y_chunks} - else: - arange = np.arange if check_rotation and self.rotation != 0: warnings.warn("Projection vectors will not be accurate because rotation is not 0", RuntimeWarning) if dtype is None: dtype = self.dtype - x_kwargs['dtype'] = dtype - y_kwargs['dtype'] = dtype - - target_x = arange(self.width, **x_kwargs) * self.pixel_size_x + self.pixel_upper_left[0] - target_y = arange(self.height, **y_kwargs) * -self.pixel_size_y + self.pixel_upper_left[1] - return target_x, target_y + x, y = _generate_1d_proj_vectors(0, self.width, + 0, self.height, + self.pixel_size_x, self.pixel_size_y, + self.pixel_upper_left[0], self.pixel_upper_left[1], + dtype, chunks=chunks) + return x, y def get_proj_vectors(self, dtype=None, chunks=None): """Calculate 1D projection coordinates for the X and Y dimension. @@ -2198,7 +2218,7 @@ def _proj_coords_dask(self, chunks, dtype): 2D chunks in an optimal way. """ - y_chunks, x_chunks = self._chunks_to_yx_chunks(chunks) + y_chunks, x_chunks = _chunks_to_yx_chunks(chunks) norm_y_chunks, norm_x_chunks = da.core.normalize_chunks((y_chunks, x_chunks), self.shape, dtype=dtype) # We must provide `chunks` and `dtype` as passed arguments to ensure # the returned array has a unique dask name From 17f0884dbfec049a6f4783c4cbe9629f7ae09bff Mon Sep 17 00:00:00 2001 From: David Hoese Date: Fri, 3 Dec 2021 10:14:42 -0600 Subject: [PATCH 5/5] More vector refactoring --- pyresample/geometry.py | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/pyresample/geometry.py b/pyresample/geometry.py index ece6b02a8..07dddc79e 100644 --- a/pyresample/geometry.py +++ b/pyresample/geometry.py @@ -1130,24 +1130,22 @@ def _generate_2d_coords(pixel_size_x, pixel_size_y, pixel_upper_left_x, pixel_up start_x_idx = block_info[None]["array-location"][2][0] end_x_idx = block_info[None]["array-location"][2][1] dtype = block_info[None]["dtype"] - x, y = _generate_1d_proj_vectors(start_x_idx, end_x_idx, - start_y_idx, end_y_idx, - pixel_size_x, pixel_size_y, - pixel_upper_left_x, pixel_upper_left_y, + x, y = _generate_1d_proj_vectors((start_x_idx, end_x_idx), + (start_y_idx, end_y_idx), + (pixel_size_x, pixel_size_y), + (pixel_upper_left_x, pixel_upper_left_y), dtype) x_2d, y_2d = np.meshgrid(x, y) res = np.stack([x_2d, y_2d]) return res -def _generate_1d_proj_vectors(start_x, end_x, - start_y, end_y, - pixel_size_x, pixel_size_y, - upper_left_x, upper_left_y, +def _generate_1d_proj_vectors(col_range, row_range, + pixel_size_xy, offset_xy, dtype, chunks=None): x_kwargs, y_kwargs, arange = _get_vector_arange_args(dtype, chunks) - x = arange(start_x, end_x, **x_kwargs) * pixel_size_x + upper_left_x - y = arange(start_y, end_y, **y_kwargs) * -pixel_size_y + upper_left_y + x = arange(*col_range, **x_kwargs) * pixel_size_xy[0] + offset_xy[0] + y = arange(*row_range, **y_kwargs) * -pixel_size_xy[1] + offset_xy[1] return x, y @@ -2114,10 +2112,10 @@ def _get_proj_vectors(self, dtype=None, check_rotation=True, chunks=None): warnings.warn("Projection vectors will not be accurate because rotation is not 0", RuntimeWarning) if dtype is None: dtype = self.dtype - x, y = _generate_1d_proj_vectors(0, self.width, - 0, self.height, - self.pixel_size_x, self.pixel_size_y, - self.pixel_upper_left[0], self.pixel_upper_left[1], + x, y = _generate_1d_proj_vectors((0, self.width), + (0, self.height), + (self.pixel_size_x, self.pixel_size_y), + (self.pixel_upper_left[0], self.pixel_upper_left[1]), dtype, chunks=chunks) return x, y