From 217ba86a8f621768443965ec6ac119732db2fba0 Mon Sep 17 00:00:00 2001 From: Leonardo Uieda Date: Wed, 10 Oct 2018 22:25:42 -1000 Subject: [PATCH] Allow gridders to pass kwargs to coord generators (#144) Gridder methods `grid`, `scatter`, and `profile` now take in `**kwargs` and pass them along to the coordinate generation functions. This will allow extra flexibility for passing in arguments, like a grid height or configuration, without needing to keep the methods in sync with the functions. Removed the `coordinate_system` mechanics from `profile` because it wasn't implemented and is probably gonna take a while. Will introduce it back when we finally have a geographic coordinate gridder. This simplifies the coordinate naming schemes and other functions. This is a step towards #138 --- verde/base.py | 153 +++++++++++++------------------- verde/coordinates.py | 65 ++++---------- verde/tests/test_base.py | 31 +------ verde/tests/test_coordinates.py | 4 - 4 files changed, 85 insertions(+), 168 deletions(-) diff --git a/verde/base.py b/verde/base.py index 527fa6fc9..3fdfefd3a 100644 --- a/verde/base.py +++ b/verde/base.py @@ -38,13 +38,6 @@ class BaseGridder(BaseEstimator): * Estimated parameters should be stored as attributes with names ending in ``_``. - The child class can define the following attributes to control the names of - coordinates and how distances are calculated: - - * ``coordinate_system``: either ``'cartesian'`` or ``'geographic'``. Will - influence dimension names and distance calculations. Defaults to - ``'cartesian'``. - Examples -------- @@ -236,50 +229,38 @@ def grid( region=None, shape=None, spacing=None, - adjust="spacing", dims=None, data_names=None, projection=None, + **kwargs ): """ Interpolate the data onto a regular grid. - The grid can be specified by either the number of points in each - dimension (the *shape*) or by the grid node spacing. - - If the given region is not divisible by the desired spacing, either the - region or the spacing will have to be adjusted. By default, the spacing - will be rounded to the nearest multiple. Optionally, the East and North - boundaries of the region can be adjusted to fit the exact spacing - given. See :func:`verde.grid_coordinates` for more details. + The grid can be specified by either the number of points in each dimension (the + *shape*) or by the grid node spacing. See :func:`verde.grid_coordinates` for + details. Other arguments for :func:`verde.grid_coordinates` can be passed as + extra keyword arguments (``kwargs``) to this method. - If the interpolator collected the input data region, then it will be - used if ``region=None``. Otherwise, you must specify the grid region. + If the interpolator collected the input data region, then it will be used if + ``region=None``. Otherwise, you must specify the grid region. - Use the *dims* and *data_names* arguments to set custom names for the - dimensions and the data field(s) in the output :class:`xarray.Dataset`. - Default names are provided. + Use the *dims* and *data_names* arguments to set custom names for the dimensions + and the data field(s) in the output :class:`xarray.Dataset`. Default names will + be provided if none are given. Parameters ---------- region : list = [W, E, S, N] - The boundaries of a given region in Cartesian or geographic - coordinates. + The boundaries of a given region in Cartesian or geographic coordinates. shape : tuple = (n_north, n_east) or None The number of points in the South-North and West-East directions, - respectively. If *None* and *spacing* is not given, defaults to - ``(101, 101)``. - spacing : tuple = (s_north, s_east) or None - The grid spacing in the South-North and West-East directions, respectively. - adjust : {'spacing', 'region'} - Whether to adjust the spacing or the region if required. Ignored if - *shape* is given instead of *spacing*. Defaults to adjusting the - spacing. + spacing : tuple = (s_north, s_east) or None + The grid spacing in the South-North and West-East directions, respectively. dims : list or None The names of the northing and easting data dimensions, respectively, in the - output grid. Defaults to ``['northing', 'easting']`` for Cartesian grids and - ``['latitude', 'longitude']`` for geographic grids. **NOTE: This is an + output grid. Defaults to ``['northing', 'easting']``. **NOTE: This is an exception to the "easting" then "northing" pattern but is required for compatibility with xarray.** data_names : list of None @@ -300,19 +281,17 @@ def grid( Returns ------- grid : xarray.Dataset - The interpolated grid. Metadata about the interpolator is written - to the ``attrs`` attribute. + The interpolated grid. Metadata about the interpolator is written to the + ``attrs`` attribute. See also -------- verde.grid_coordinates : Generate the coordinate values for the grid. """ - dims = get_dims(self, dims) + dims = get_dims(dims) region = get_instance_region(self, region) - coordinates = grid_coordinates( - region, shape=shape, spacing=spacing, adjust=adjust - ) + coordinates = grid_coordinates(region, shape=shape, spacing=spacing, **kwargs) if projection is None: data = check_data(self.predict(coordinates)) else: @@ -333,16 +312,21 @@ def scatter( dims=None, data_names=None, projection=None, + **kwargs ): """ Interpolate values onto a random scatter of points. - If the interpolator collected the input data region, then it will be - used if ``region=None``. Otherwise, you must specify the grid region. + Point coordinates are generated by :func:`verde.scatter_points`. Other arguments + for this function can be passed as extra keyword arguments (``kwargs``) to this + method. - Use the *dims* and *data_names* arguments to set custom names for the - dimensions and the data field(s) in the output - :class:`pandas.DataFrame`. Default names are provided. + If the interpolator collected the input data region, then it will be used if + ``region=None``. Otherwise, you must specify the grid region. + + Use the *dims* and *data_names* arguments to set custom names for the dimensions + and the data field(s) in the output :class:`pandas.DataFrame`. Default names are + provided. Parameters ---------- @@ -358,8 +342,7 @@ def scatter( (resulting in different numbers with each run). dims : list or None The names of the northing and easting data dimensions, respectively, in the - output dataframe. Defaults to ``['northing', 'easting']`` for Cartesian - grids and ``['latitude', 'longitude']`` for geographic grids. **NOTE: This + output dataframe. Defaults to ``['northing', 'easting']``. **NOTE: This is an exception to the "easting" then "northing" pattern but is required for compatibility with xarray.** data_names : list of None @@ -383,9 +366,9 @@ def scatter( The interpolated values on a random set of points. """ - dims = get_dims(self, dims) + dims = get_dims(dims) region = get_instance_region(self, region) - coordinates = scatter_points(region, size, random_state) + coordinates = scatter_points(region, size, random_state=random_state, **kwargs) if projection is None: data = check_data(self.predict(coordinates)) else: @@ -396,35 +379,41 @@ def scatter( return pd.DataFrame(dict(columns), columns=[c[0] for c in columns]) def profile( - self, point1, point2, size, dims=None, data_names=None, projection=None + self, + point1, + point2, + size, + dims=None, + data_names=None, + projection=None, + **kwargs ): """ Interpolate data along a profile between two points. - Generates the profile using a straight line if the interpolator assumes - Cartesian data or a great circle if geographic data. + Generates the profile along a straight line assuming Cartesian distances. Point + coordinates are generated by :func:`verde.profile_coordinates`. Other arguments + for this function can be passed as extra keyword arguments (``kwargs``) to this + method. - Use the *dims* and *data_names* arguments to set custom names for the - dimensions and the data field(s) in the output - :class:`pandas.DataFrame`. Default names are provided. + Use the *dims* and *data_names* arguments to set custom names for the dimensions + and the data field(s) in the output :class:`pandas.DataFrame`. Default names are + provided. - Includes the calculated distance to *point1* for each data point in the - profile. + Includes the calculated Cartesian distance from *point1* for each data point in + the profile. Parameters ---------- point1 : tuple - The easting and northing coordinates, respectively, of the first - point. + The easting and northing coordinates, respectively, of the first point. point2 : tuple - The easting and northing coordinates, respectively, of the second - point. + The easting and northing coordinates, respectively, of the second point. size : int The number of points to generate. dims : list or None The names of the northing and easting data dimensions, respectively, in the - output dataframe. Defaults to ``['northing', 'easting']`` for Cartesian - grids and ``['latitude', 'longitude']`` for geographic grids. **NOTE: This + output dataframe. Defaults to ``['northing', 'easting']``. **NOTE: This is an exception to the "easting" then "northing" pattern but is required for compatibility with xarray.** data_names : list of None @@ -448,51 +437,37 @@ def profile( The interpolated values along the profile. """ - coordsys = getattr(self, "coordinate_system", "cartesian") - dims = get_dims(self, dims) - east, north, distances = profile_coordinates( - point1, point2, size, coordinate_system=coordsys - ) + dims = get_dims(dims) + coordinates, distances = profile_coordinates(point1, point2, size, **kwargs) if projection is None: - data = check_data(self.predict((east, north))) + data = check_data(self.predict(coordinates)) else: - data = check_data(self.predict(projection(east, north))) + data = check_data(self.predict(projection(*coordinates))) data_names = get_data_names(data, data_names) - columns = [(dims[0], north), (dims[1], east), ("distance", distances)] + columns = [ + (dims[0], coordinates[1]), + (dims[1], coordinates[0]), + ("distance", distances), + ] columns.extend(zip(data_names, data)) return pd.DataFrame(dict(columns), columns=[c[0] for c in columns]) -def get_dims(instance, dims): +def get_dims(dims): """ - Get default dimension names for an instance if not given as arguments. + Get default dimension names. Examples -------- - >>> grd = BaseGridder() - >>> get_dims(grd, dims=None) + >>> get_dims(dims=None) ('northing', 'easting') - >>> grd.coordinate_system = 'geographic' - >>> get_dims(grd, dims=None) - ('latitude', 'longitude') - >>> grd.coordinate_system = 'cartesian' - >>> get_dims(grd, dims=('john', 'paul')) + >>> get_dims(dims=('john', 'paul')) ('john', 'paul') """ if dims is not None: return dims - valid_coords = ["cartesian", "geographic"] - coords = getattr(instance, "coordinate_system", "cartesian") - if coords not in valid_coords: - raise ValueError( - "Invalid coordinate system for {}: '{}'. Must be one of {}.".format( - instance.__class__.__name__, coords, str(valid_coords) - ) - ) - if coords == "geographic": - return ("latitude", "longitude") return ("northing", "easting") diff --git a/verde/coordinates.py b/verde/coordinates.py index 3fb50b86b..40e3e41ca 100644 --- a/verde/coordinates.py +++ b/verde/coordinates.py @@ -439,12 +439,9 @@ def spacing_to_shape(region, spacing, adjust): return (nnorth, neast), (w, e, s, n) -def profile_coordinates(point1, point2, size, coordinate_system="cartesian"): +def profile_coordinates(point1, point2, size): """ - Coordinates for a profile along a line between two points. - - If on a geographic coordinate system, will calculate along a great circle. - Otherwise, will use a straight line. + Coordinates for a profile along a straight line between two points. Parameters ---------- @@ -456,20 +453,17 @@ def profile_coordinates(point1, point2, size, coordinate_system="cartesian"): second point, respectively. size : int Number of points to sample along the line. - coordinate_system : str - The coordinate system used to define the points and the line. Either - ``'cartesian'`` or ``'geographic'``. Returns ------- - easting, northing, distances : 1d arrays - The easting and northing coordinates of points along the straight line - and the distances from the first point. + coordinates, distances : tuple and 1d array + The coordinates of points along the straight line and the distances from the + first point. Examples -------- - >>> east, north, dist = profile_coordinates((1, 10), (1, 20), size=11) + >>> (east, north), dist = profile_coordinates((1, 10), (1, 20), size=11) >>> print('easting:', ', '.join('{:.1f}'.format(i) for i in east)) easting: 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 >>> print('northing:', ', '.join('{:.1f}'.format(i) for i in north)) @@ -483,29 +477,19 @@ def profile_coordinates(point1, point2, size, coordinate_system="cartesian"): grid_coordinates : Generate coordinates for each point on a regular grid """ - valid_coordinate_systems = ["cartesian", "geographic"] - if coordinate_system not in valid_coordinate_systems: - raise ValueError( - "Invalid coordinate system '{}'. Must be one of {}.".format( - coordinate_system, str(valid_coordinate_systems) - ) - ) if size <= 0: raise ValueError("Invalid profile size '{}'. Must be > 0.".format(size)) - if coordinate_system == "geographic": - raise NotImplementedError() - elif coordinate_system == "cartesian": - east1, north1 = point1 - east2, north2 = point2 - separation = np.sqrt((east1 - east2) ** 2 + (north1 - north2) ** 2) - distances = np.linspace(0, separation, size) - angle = np.arctan2(north2 - north1, east2 - east1) - easting = east1 + distances * np.cos(angle) - northing = north1 + distances * np.sin(angle) - return easting, northing, distances - - -def inside(coordinates, region, out=None, tmp=None): + east1, north1 = point1 + east2, north2 = point2 + separation = np.sqrt((east1 - east2) ** 2 + (north1 - north2) ** 2) + distances = np.linspace(0, separation, size) + angle = np.arctan2(north2 - north1, east2 - east1) + easting = east1 + distances * np.cos(angle) + northing = north1 + distances * np.sin(angle) + return (easting, northing), distances + + +def inside(coordinates, region): """ Determine which points fall inside a given region. @@ -520,15 +504,6 @@ def inside(coordinates, region, out=None, tmp=None): region : list = [W, E, S, N] The boundaries of a given region in Cartesian or geographic coordinates. - out : None or array of booleans - Numpy array to be used as output. The contents will be overwritten and - the same array will be returned. - tmp : None or tuple - Numpy arrays used to store the outputs of temporary logical operations. - Passing in pre-allocated arrays avoids the overhead of allocation when - calling this function repeatedly. If not None, then should be a tuple - of 4 numpy arrays of boolean type and a shape equal to or broadcast - from the input coordinates. Returns ------- @@ -564,10 +539,8 @@ def inside(coordinates, region, out=None, tmp=None): w, e, s, n = region easting, northing = coordinates[:2] # Allocate temporary arrays to minimize memory allocation overhead - if out is None: - out = np.empty_like(easting, dtype=np.bool) - if tmp is None: - tmp = tuple(np.empty_like(easting, dtype=np.bool) for i in range(4)) + out = np.empty_like(easting, dtype=np.bool) + tmp = tuple(np.empty_like(easting, dtype=np.bool) for i in range(4)) # Using the logical functions is a lot faster than & > < for some reason # Plus, this way avoids repeated allocation of intermediate arrays in_we = np.logical_and( diff --git a/verde/tests/test_base.py b/verde/tests/test_base.py index 9f30077bb..d1db6c26f 100644 --- a/verde/tests/test_base.py +++ b/verde/tests/test_base.py @@ -18,35 +18,8 @@ def test_get_dims(): "Tests that get_dims returns the expected results" - grd = BaseGridder() - assert get_dims(grd, dims=None) == ("northing", "easting") - assert get_dims(grd, dims=("john", "paul")) == ("john", "paul") - - grd.coordinate_system = "geographic" - assert get_dims(grd, dims=None) == ("latitude", "longitude") - assert get_dims(grd, dims=("john", "paul")) == ("john", "paul") - - grd.coordinate_system = "cartesian" - assert get_dims(grd, dims=None) == ("northing", "easting") - assert get_dims(grd, dims=("john", "paul")) == ("john", "paul") - - # Make sure the given dims is returned no matter what - grd.coordinate_system = "an invalid system" - assert get_dims(grd, dims=("john", "paul")) == ("john", "paul") - - -def test_get_dims_fails(): - "Check if fails for invalid coordinate system" - grd = BaseGridder() - with pytest.raises(ValueError): - grd.coordinate_system = "Cartesian" - get_dims(grd, dims=None) - with pytest.raises(ValueError): - grd.coordinate_system = "Geographic" - get_dims(grd, dims=None) - with pytest.raises(ValueError): - grd.coordinate_system = "some totally not valid name" - get_dims(grd, dims=None) + assert get_dims(dims=None) == ("northing", "easting") + assert get_dims(dims=("john", "paul")) == ("john", "paul") def test_get_data_names(): diff --git a/verde/tests/test_coordinates.py b/verde/tests/test_coordinates.py index ad1858273..d62b4e8ca 100644 --- a/verde/tests/test_coordinates.py +++ b/verde/tests/test_coordinates.py @@ -92,7 +92,3 @@ def test_profile_coordiantes_fails(): profile_coordinates((0, 1), (1, 2), size=0) with pytest.raises(ValueError): profile_coordinates((0, 1), (1, 2), size=-10) - with pytest.raises(ValueError): - profile_coordinates((0, 1), (1, 2), size=10, coordinate_system="meh") - with pytest.raises(NotImplementedError): - profile_coordinates((0, 1), (1, 2), size=10, coordinate_system="geographic")