From 69ef210ed59a068bb5522c4ef3d688b4d3abd4a3 Mon Sep 17 00:00:00 2001 From: Leonardo Uieda Date: Wed, 11 Mar 2020 17:30:15 +0000 Subject: [PATCH 1/4] Add function to split data on an expanding window For selecting data in windows of various size around a center point. Sizes do not have to be increasing necessarily. Returns only the indices of points falling inside each window. --- doc/api/index.rst | 3 +- verde/__init__.py | 1 + verde/base/utils.py | 24 +++++++- verde/coordinates.py | 121 ++++++++++++++++++++++++++++++++++++++- verde/tests/test_base.py | 16 +++++- 5 files changed, 160 insertions(+), 5 deletions(-) diff --git a/doc/api/index.rst b/doc/api/index.rst index 384d9750b..74c48c8c6 100644 --- a/doc/api/index.rst +++ b/doc/api/index.rst @@ -60,7 +60,8 @@ Coordinate Manipulation project_region inside block_split - rolling_window + rolling_window + expanding_split Utilities --------- diff --git a/verde/__init__.py b/verde/__init__.py index 7f565917f..5ded3bdbb 100644 --- a/verde/__init__.py +++ b/verde/__init__.py @@ -8,6 +8,7 @@ inside, block_split, rolling_window, + expanding_split, profile_coordinates, get_region, pad_region, diff --git a/verde/base/utils.py b/verde/base/utils.py index d35c216a8..4c44c4ca0 100644 --- a/verde/base/utils.py +++ b/verde/base/utils.py @@ -25,6 +25,21 @@ def check_data(data): return data +def check_coordinates(coordinates): + """ + Check that the given coordinate arrays are what we expect them to be. + Should be a tuple with arrays of the same shape. + """ + shapes = [coord.shape for coord in coordinates] + if not all(shape == shapes[0] for shape in shapes): + raise ValueError( + "Coordinate arrays must have the same shape. Coordinate shapes: {}".format( + shapes + ) + ) + return coordinates + + def check_fit_input(coordinates, data, weights, unpack=True): """ Validate the inputs to the fit method of gridders. @@ -59,8 +74,13 @@ def check_fit_input(coordinates, data, weights, unpack=True): """ data = check_data(data) weights = check_data(weights) - if any(i.shape != j.shape for i in coordinates for j in data): - raise ValueError("Coordinate and data arrays must have the same shape.") + coordinates = check_coordinates(coordinates) + if any(i.shape != coordinates[0].shape for i in data): + raise ValueError( + "Data arrays must have the same shape {} as coordinates. Data shapes: {}.".format( + coordinates[0].shape, [i.shape for i in data] + ) + ) if any(w is not None for w in weights): if len(weights) != len(data): raise ValueError( diff --git a/verde/coordinates.py b/verde/coordinates.py index 02532ddea..bbe2d562e 100644 --- a/verde/coordinates.py +++ b/verde/coordinates.py @@ -6,7 +6,7 @@ import numpy as np from sklearn.utils import check_random_state -from .base.utils import n_1d_arrays +from .base.utils import n_1d_arrays, check_coordinates from .utils import kdtree @@ -771,6 +771,7 @@ def block_split(coordinates, spacing=None, adjust="spacing", region=None, shape= [6 6 6 7 7 7]] """ + coordinates = check_coordinates(coordinates) if region is None: region = get_region(coordinates) block_coords = grid_coordinates( @@ -1005,6 +1006,124 @@ def _check_rolling_window_overlap(region, size, shape, spacing): ) +def expanding_split(coordinates, center, sizes): + """ + Split the given points on an expanding window. + + Returns the indices of points falling inside a window of expanding size + centered on a given point. + + Parameters + ---------- + coordinates : tuple of arrays + Arrays with the coordinates of each data point. Should be in the + following order: (easting, northing, vertical, ...). + center : tuple + The coordinates of the center of the window. Should be in the + following order: (easting, northing, vertical, ...). + sizes : array + The sizes of the windows. Does not have to be in any particular order. + The order of indices returned will match the order of window sizes + given. Units should match the units of *coordinates* and *center*. + + Returns + ------- + indices : list + Each element of the list corresponds to a window. Each contains the + indices of points falling inside the respective window. Use them to + index the coordinates for each window. The indices will depend on the + number of dimensions in the input coordinates. For example, if the + coordinates are 2D arrays, each window will contain indices for 2 + dimensions (row, column). + + See also + -------- + block_split : Split a region into blocks and label points accordingly. + + Examples + -------- + + Generate a set of sample coordinates on a grid and determine the indices + of points for each expanding window: + + >>> from verde import grid_coordinates + >>> coords = grid_coordinates((-5, -1, 6, 10), spacing=1) + >>> print(coords[0]) + [[-5. -4. -3. -2. -1.] + [-5. -4. -3. -2. -1.] + [-5. -4. -3. -2. -1.] + [-5. -4. -3. -2. -1.] + [-5. -4. -3. -2. -1.]] + >>> print(coords[1]) + [[ 6. 6. 6. 6. 6.] + [ 7. 7. 7. 7. 7.] + [ 8. 8. 8. 8. 8.] + [ 9. 9. 9. 9. 9.] + [10. 10. 10. 10. 10.]] + >>> # Get the expanding window indices + >>> indices = expanding_split(coords, center=(-3, 8), sizes=[1, 2, 4]) + >>> # There is one index per window + >>> print(len(indices)) + 3 + >>> # The points in the first window. Indices are 2D positions because the + >>> # coordinate arrays are 2D. + >>> print(len(indices[0])) + 2 + >>> for dimension in indices[0]: + ... print(dimension) + [2] + [2] + >>> for dimension in indices[1]: + ... print(dimension) + [1 1 1 2 2 2 3 3 3] + [1 2 3 1 2 3 1 2 3] + >>> for dimension in indices[2]: + ... print(dimension) + [0 0 0 0 0 1 1 1 1 1 2 2 2 2 2 3 3 3 3 3 4 4 4 4 4] + [0 1 2 3 4 0 1 2 3 4 0 1 2 3 4 0 1 2 3 4 0 1 2 3 4] + >>> # To get the coordinates for each window, use indexing + >>> print(coords[0][indices[0]]) + [-3.] + >>> print(coords[1][indices[0]]) + [8.] + >>> print(coords[0][indices[1]]) + [-4. -3. -2. -4. -3. -2. -4. -3. -2.] + >>> print(coords[1][indices[1]]) + [7. 7. 7. 8. 8. 8. 9. 9. 9.] + + If the coordinates are 1D, the indices will also be 1D: + + >>> coords1d = [coord.ravel() for coord in coords] + >>> indices = expanding_split(coords1d, center=(-3, 8), sizes=[1, 2, 4]) + >>> print(len(indices)) + 3 + >>> # Since coordinates are 1D, there is only one index + >>> print(len(indices[0])) + 1 + >>> print(indices[0][0]) + [12] + >>> print(indices[1][0]) + [ 6 7 8 11 12 13 16 17 18] + >>> # The returned indices can be used in the same way as before + >>> print(coords1d[0][indices[0]]) + [-3.] + >>> print(coords1d[1][indices[0]]) + [8.] + + """ + coordinates = check_coordinates(coordinates) + shape = coordinates[0].shape + center = np.atleast_2d(center) + # pykdtree doesn't support query_ball_point yet and we need that + tree = kdtree(coordinates, use_pykdtree=False) + indices = [] + for size in sizes: + # Use p=inf (infinity norm) to get square windows instead of circular + index1d = tree.query_ball_point(center, r=size / 2, p=np.inf)[0] + indices.append(np.unravel_index(index1d, shape=shape)) + return indices + + def longitude_continuity(coordinates, region): """ Modify coordinates and region boundaries to ensure longitude continuity. diff --git a/verde/tests/test_base.py b/verde/tests/test_base.py index b46884b8f..184f65561 100644 --- a/verde/tests/test_base.py +++ b/verde/tests/test_base.py @@ -6,7 +6,7 @@ import numpy.testing as npt import pytest -from ..base.utils import check_fit_input +from ..base.utils import check_fit_input, check_coordinates from ..base.base_classes import ( BaseGridder, get_dims, @@ -16,6 +16,20 @@ from ..coordinates import grid_coordinates, scatter_points +def test_check_coordinates(): + "Should raise a ValueError is the coordinates have different shapes." + # Should not raise an error + check_coordinates([np.arange(10), np.arange(10)]) + check_coordinates([np.arange(10).reshape((5, 2)), np.arange(10).reshape((5, 2))]) + # Should raise an error + with pytest.raises(ValueError): + check_coordinates([np.arange(10), np.arange(10).reshape((5, 2))]) + with pytest.raises(ValueError): + check_coordinates( + [np.arange(10).reshape((2, 5)), np.arange(10).reshape((5, 2))] + ) + + def test_get_dims(): "Tests that get_dims returns the expected results" assert get_dims(dims=None) == ("northing", "easting") From d3fa7360de20a1cf38daa6f43cede219c55079bb Mon Sep 17 00:00:00 2001 From: Leonardo Uieda Date: Thu, 12 Mar 2020 17:36:30 +0000 Subject: [PATCH 2/4] Update the documetation and convert to int before unravel_index --- doc/api/index.rst | 2 +- verde/__init__.py | 2 +- verde/coordinates.py | 26 +++++++++++++------------- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/doc/api/index.rst b/doc/api/index.rst index 74c48c8c6..42fa05b29 100644 --- a/doc/api/index.rst +++ b/doc/api/index.rst @@ -61,7 +61,7 @@ Coordinate Manipulation inside block_split rolling_window - expanding_split + expanding_window Utilities --------- diff --git a/verde/__init__.py b/verde/__init__.py index 5ded3bdbb..b7df9b76a 100644 --- a/verde/__init__.py +++ b/verde/__init__.py @@ -8,7 +8,7 @@ inside, block_split, rolling_window, - expanding_split, + expanding_window, profile_coordinates, get_region, pad_region, diff --git a/verde/coordinates.py b/verde/coordinates.py index bbe2d562e..e39d26475 100644 --- a/verde/coordinates.py +++ b/verde/coordinates.py @@ -1006,12 +1006,11 @@ def _check_rolling_window_overlap(region, size, shape, spacing): ) -def expanding_split(coordinates, center, sizes): +def expanding_window(coordinates, center, sizes): """ - Split the given points on an expanding window. + Select points on windows of changing size around a center point. - Returns the indices of points falling inside a window of expanding size - centered on a given point. + Returns the indices of points falling inside each window. Parameters ---------- @@ -1029,12 +1028,11 @@ def expanding_split(coordinates, center, sizes): Returns ------- indices : list - Each element of the list corresponds to a window. Each contains the - indices of points falling inside the respective window. Use them to - index the coordinates for each window. The indices will depend on the - number of dimensions in the input coordinates. For example, if the - coordinates are 2D arrays, each window will contain indices for 2 - dimensions (row, column). + Each element of the list corresponds to the indices of points falling + inside a window. Use them to index the coordinates for each window. The + indices will depend on the number of dimensions in the input + coordinates. For example, if the coordinates are 2D arrays, each window + will contain indices for 2 dimensions (row, column). See also -------- @@ -1061,7 +1059,7 @@ def expanding_split(coordinates, center, sizes): [ 9. 9. 9. 9. 9.] [10. 10. 10. 10. 10.]] >>> # Get the expanding window indices - >>> indices = expanding_split(coords, center=(-3, 8), sizes=[1, 2, 4]) + >>> indices = expanding_window(coords, center=(-3, 8), sizes=[1, 2, 4]) >>> # There is one index per window >>> print(len(indices)) 3 @@ -1094,7 +1092,7 @@ def expanding_split(coordinates, center, sizes): If the coordinates are 1D, the indices will also be 1D: >>> coords1d = [coord.ravel() for coord in coords] - >>> indices = expanding_split(coords1d, center=(-3, 8), sizes=[1, 2, 4]) + >>> indices = expanding_window(coords1d, center=(-3, 8), sizes=[1, 2, 4]) >>> print(len(indices)) 3 >>> # Since coordinates are 1D, there is only one index @@ -1120,7 +1118,9 @@ def expanding_split(coordinates, center, sizes): for size in sizes: # Use p=inf (infinity norm) to get square windows instead of circular index1d = tree.query_ball_point(center, r=size / 2, p=np.inf)[0] - indices.append(np.unravel_index(index1d, shape=shape)) + # Convert indices to an array to avoid errors when the index is empty + # (no points in the window). unravel_index doesn't like empty lists. + indices.append(np.unravel_index(np.array(index1d, dtype="int"), shape=shape)) return indices From 27329d1ab4b7bee41dc1e9a1c1f1ecbd1eae208d Mon Sep 17 00:00:00 2001 From: Leonardo Uieda Date: Fri, 13 Mar 2020 09:53:44 +0000 Subject: [PATCH 3/4] Add see also links and use check_coordinates --- verde/coordinates.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/verde/coordinates.py b/verde/coordinates.py index e39d26475..d29d70eac 100644 --- a/verde/coordinates.py +++ b/verde/coordinates.py @@ -738,6 +738,7 @@ def block_split(coordinates, spacing=None, adjust="spacing", region=None, shape= -------- BlockReduce : Apply a reduction operation to the data in blocks (windows). rolling_window : Select points on a rolling (moving) window. + expanding_window : Select points on windows of changing size. Examples -------- @@ -836,6 +837,7 @@ def rolling_window( See also -------- block_split : Split a region into blocks and label points accordingly. + expanding_window : Select points on windows of changing size. Examples -------- @@ -944,13 +946,7 @@ def rolling_window( [6. 6. 6. 7. 7. 7. 8. 8. 8.] """ - shapes = [coord.shape for coord in coordinates] - if not all(shape == shapes[0] for shape in shapes): - raise ValueError( - "Coordinate arrays must have the same shape. Given shapes: {}".format( - shapes - ) - ) + coordinates = check_coordinates(coordinates) if region is None: region = get_region(coordinates) # Calculate the region spanning the centers of the rolling windows @@ -982,7 +978,8 @@ def rolling_window( # like empty lists but can handle empty integer arrays in case a window has # no points inside it. indices.ravel()[:] = [ - np.unravel_index(np.array(i, dtype="int"), shape=shapes[0]) for i in indices1d + np.unravel_index(np.array(i, dtype="int"), shape=coordinates[0].shape) + for i in indices1d ] return centers, indices @@ -1037,6 +1034,7 @@ def expanding_window(coordinates, center, sizes): See also -------- block_split : Split a region into blocks and label points accordingly. + rolling_window : Select points on a rolling (moving) window. Examples -------- From cb3cbff06e152d5224899bafa2b4787e405c72ce Mon Sep 17 00:00:00 2001 From: Leonardo Uieda Date: Fri, 13 Mar 2020 14:56:34 +0000 Subject: [PATCH 4/4] Fix identation in API docs Co-Authored-By: Santiago Soler --- doc/api/index.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/api/index.rst b/doc/api/index.rst index 42fa05b29..71a1b9c8c 100644 --- a/doc/api/index.rst +++ b/doc/api/index.rst @@ -60,7 +60,7 @@ Coordinate Manipulation project_region inside block_split - rolling_window + rolling_window expanding_window Utilities