From 1733dc1ff6099f4d7ef81250eab034c92113d1df Mon Sep 17 00:00:00 2001 From: Matt Ueckermann Date: Wed, 27 Nov 2019 13:16:56 -0500 Subject: [PATCH 1/3] FIX: Fixing time selection when the precision of the self and other are not the same. By default, the lowest precision should be used to determine if time data intersects. --- podpac/core/coordinates/coordinates1d.py | 15 +++++++++++++-- .../test/test_array_coordinates1d.py | 8 ++++++++ .../test/test_uniform_coordinates1d.py | 8 ++++++++ .../core/coordinates/uniform_coordinates1d.py | 19 +++++++++++++++---- 4 files changed, 44 insertions(+), 6 deletions(-) diff --git a/podpac/core/coordinates/coordinates1d.py b/podpac/core/coordinates/coordinates1d.py index 91abbfdea..6e614c3c1 100644 --- a/podpac/core/coordinates/coordinates1d.py +++ b/podpac/core/coordinates/coordinates1d.py @@ -356,13 +356,24 @@ def select(self, bounds, return_indices=False, outer=False): return self._select_full(return_indices) bounds = make_coord_value(bounds[0]), make_coord_value(bounds[1]) + my_bounds = self.area_bounds.copy() + + # If the bounds are of instance datetime64, then the comparison should happen at the lowest precision + if self.name == "time" and isinstance(my_bounds[0], np.datetime64) and isinstance(bounds[0], np.datetime64): + mine = np.timedelta64(1, my_bounds[0].dtype.name.replace("datetime64", "")[1:-1]) + your = np.timedelta64(1, bounds[0].dtype.name.replace("datetime64", "")[1:-1]) + + if mine > your: + bounds = [b.astype(my_bounds[0].dtype) for b in bounds] + else: + my_bounds = [b.astype(bounds[0].dtype) for b in my_bounds] # full - if self.bounds[0] >= bounds[0] and self.bounds[1] <= bounds[1]: + if my_bounds[0] >= bounds[0] and my_bounds[1] <= bounds[1]: return self._select_full(return_indices) # none - if self.area_bounds[0] > bounds[1] or self.area_bounds[1] < bounds[0]: + if my_bounds[0] > bounds[1] or my_bounds[1] < bounds[0]: return self._select_empty(return_indices) # partial, implemented in child classes diff --git a/podpac/core/coordinates/test/test_array_coordinates1d.py b/podpac/core/coordinates/test/test_array_coordinates1d.py index bc9df5093..4f824405d 100644 --- a/podpac/core/coordinates/test/test_array_coordinates1d.py +++ b/podpac/core/coordinates/test/test_array_coordinates1d.py @@ -1039,6 +1039,14 @@ def test_select_time(self): s = c.select({"time": [np.datetime64("2018-01-03"), "2018-02-06"]}) assert_equal(s.coordinates, np.array(["2018-01-03", "2018-01-04"]).astype(np.datetime64)) + def test_select_time_variable_precision(self): + c = ArrayCoordinates1d(["2012-05-19"], name="time") + c2 = ArrayCoordinates1d(["2012-05-19T12:00:00"], name="time") + s = c.select(c2.bounds) + s2 = c2.select(c.bounds) + assert s.size == 1 + assert s2.size == 1 + def test_select_dtype(self): c = ArrayCoordinates1d([20.0, 40.0, 60.0, 10.0, 90.0, 50.0], name="lat") with pytest.raises(TypeError): diff --git a/podpac/core/coordinates/test/test_uniform_coordinates1d.py b/podpac/core/coordinates/test/test_uniform_coordinates1d.py index 0cc323332..dbc8ebc8d 100644 --- a/podpac/core/coordinates/test/test_uniform_coordinates1d.py +++ b/podpac/core/coordinates/test/test_uniform_coordinates1d.py @@ -1180,3 +1180,11 @@ def test_select_outer(self): assert isinstance(s, ArrayCoordinates1d) assert_equal(s.coordinates, []) assert_equal(c.coordinates[I], []) + + def test_select_time_variable_precision(self): + c = UniformCoordinates1d("2012-05-19", "2012-05-20", "1,D", name="time") + c2 = UniformCoordinates1d("2012-05-20T12:00:00", "2012-05-21T12:00:00", "1,D", name="time") + s = c.select(c2.bounds) + s2 = c2.select(c.bounds) + assert s.size == 1 + assert s2.size == 1 diff --git a/podpac/core/coordinates/uniform_coordinates1d.py b/podpac/core/coordinates/uniform_coordinates1d.py index a4f3fce57..69e7b6cfc 100644 --- a/podpac/core/coordinates/uniform_coordinates1d.py +++ b/podpac/core/coordinates/uniform_coordinates1d.py @@ -384,12 +384,23 @@ def copy(self): def _select(self, bounds, return_indices, outer): # TODO is there an easier way to do this with the new outer flag? + my_bounds = self.bounds.copy() - lo = max(bounds[0], self.bounds[0]) - hi = min(bounds[1], self.bounds[1]) + # If the bounds are of instance datetime64, then the comparison should happen at the lowest precision + if self.name == "time" and isinstance(my_bounds[0], np.datetime64) and isinstance(bounds[0], np.datetime64): + mine = np.timedelta64(1, my_bounds[0].dtype.name.replace("datetime64", "")[1:-1]) + your = np.timedelta64(1, bounds[0].dtype.name.replace("datetime64", "")[1:-1]) - fmin = (lo - self.bounds[0]) / np.abs(self.step) - fmax = (hi - self.bounds[0]) / np.abs(self.step) + if mine > your: + bounds = [b.astype(my_bounds[0].dtype) for b in bounds] + else: + my_bounds = [b.astype(bounds[0].dtype) for b in my_bounds] + + lo = max(bounds[0], my_bounds[0]) + hi = min(bounds[1], my_bounds[1]) + + fmin = (lo - my_bounds[0]) / np.abs(self.step) + fmax = (hi - my_bounds[0]) / np.abs(self.step) imin = int(np.ceil(fmin)) imax = int(np.floor(fmax)) From 8f2a0aab677739d00f1f8035e0f7de29ab9c63fd Mon Sep 17 00:00:00 2001 From: Matt Ueckermann Date: Mon, 2 Dec 2019 16:08:40 -0500 Subject: [PATCH 2/3] FIX: Updates based on review. * factored out the lowering of the precision to a function * Only do this selection when outer==True for higher precision other (compared to inner) * Check for self.dtype instead of self.name * Check input bound dtypes and throw TypeError if anything is not a np.datetime64 --- podpac/core/coordinates/coordinates1d.py | 13 +++---- .../test/test_array_coordinates1d.py | 4 +- .../test/test_uniform_coordinates1d.py | 4 +- .../core/coordinates/uniform_coordinates1d.py | 12 +++--- podpac/core/coordinates/utils.py | 38 +++++++++++++++++++ 5 files changed, 54 insertions(+), 17 deletions(-) diff --git a/podpac/core/coordinates/coordinates1d.py b/podpac/core/coordinates/coordinates1d.py index 6e614c3c1..8159332dd 100644 --- a/podpac/core/coordinates/coordinates1d.py +++ b/podpac/core/coordinates/coordinates1d.py @@ -12,7 +12,7 @@ from podpac.core.utils import ArrayTrait from podpac.core.coordinates.utils import make_coord_value, make_coord_delta, make_coord_delta_array -from podpac.core.coordinates.utils import add_coord, divide_delta +from podpac.core.coordinates.utils import add_coord, divide_delta, lower_precision_time_bounds from podpac.core.coordinates.utils import Dimension, CoordinateType from podpac.core.coordinates.base_coordinates import BaseCoordinates @@ -359,14 +359,11 @@ def select(self, bounds, return_indices=False, outer=False): my_bounds = self.area_bounds.copy() # If the bounds are of instance datetime64, then the comparison should happen at the lowest precision - if self.name == "time" and isinstance(my_bounds[0], np.datetime64) and isinstance(bounds[0], np.datetime64): - mine = np.timedelta64(1, my_bounds[0].dtype.name.replace("datetime64", "")[1:-1]) - your = np.timedelta64(1, bounds[0].dtype.name.replace("datetime64", "")[1:-1]) + if self.dtype == np.datetime64: + if not isinstance(bounds[0], np.datetime64): + raise TypeError("Input bounds should be of type np.datetime64 when selecting data from:", str(self)) - if mine > your: - bounds = [b.astype(my_bounds[0].dtype) for b in bounds] - else: - my_bounds = [b.astype(bounds[0].dtype) for b in my_bounds] + my_bounds, bounds = lower_precision_time_bounds(my_bounds, bounds, outer) # full if my_bounds[0] >= bounds[0] and my_bounds[1] <= bounds[1]: diff --git a/podpac/core/coordinates/test/test_array_coordinates1d.py b/podpac/core/coordinates/test/test_array_coordinates1d.py index 4f824405d..2892232b4 100644 --- a/podpac/core/coordinates/test/test_array_coordinates1d.py +++ b/podpac/core/coordinates/test/test_array_coordinates1d.py @@ -1042,9 +1042,11 @@ def test_select_time(self): def test_select_time_variable_precision(self): c = ArrayCoordinates1d(["2012-05-19"], name="time") c2 = ArrayCoordinates1d(["2012-05-19T12:00:00"], name="time") - s = c.select(c2.bounds) + s = c.select(c2.bounds, outer=True) + s1 = c.select(c2.bounds, outer=False) s2 = c2.select(c.bounds) assert s.size == 1 + assert s1.size == 0 assert s2.size == 1 def test_select_dtype(self): diff --git a/podpac/core/coordinates/test/test_uniform_coordinates1d.py b/podpac/core/coordinates/test/test_uniform_coordinates1d.py index dbc8ebc8d..3e4ef61b4 100644 --- a/podpac/core/coordinates/test/test_uniform_coordinates1d.py +++ b/podpac/core/coordinates/test/test_uniform_coordinates1d.py @@ -1184,7 +1184,9 @@ def test_select_outer(self): def test_select_time_variable_precision(self): c = UniformCoordinates1d("2012-05-19", "2012-05-20", "1,D", name="time") c2 = UniformCoordinates1d("2012-05-20T12:00:00", "2012-05-21T12:00:00", "1,D", name="time") - s = c.select(c2.bounds) + s = c.select(c2.bounds, outer=True) + s1 = c.select(c2.bounds, outer=False) s2 = c2.select(c.bounds) assert s.size == 1 + assert s1.size == 0 assert s2.size == 1 diff --git a/podpac/core/coordinates/uniform_coordinates1d.py b/podpac/core/coordinates/uniform_coordinates1d.py index 69e7b6cfc..bc2286aa8 100644 --- a/podpac/core/coordinates/uniform_coordinates1d.py +++ b/podpac/core/coordinates/uniform_coordinates1d.py @@ -8,6 +8,7 @@ from collections import OrderedDict from podpac.core.coordinates.utils import make_coord_value, make_coord_delta, add_coord, divide_delta +from podpac.core.coordinates.utils import lower_precision_time_bounds from podpac.core.coordinates.coordinates1d import Coordinates1d from podpac.core.coordinates.array_coordinates1d import ArrayCoordinates1d @@ -387,14 +388,11 @@ def _select(self, bounds, return_indices, outer): my_bounds = self.bounds.copy() # If the bounds are of instance datetime64, then the comparison should happen at the lowest precision - if self.name == "time" and isinstance(my_bounds[0], np.datetime64) and isinstance(bounds[0], np.datetime64): - mine = np.timedelta64(1, my_bounds[0].dtype.name.replace("datetime64", "")[1:-1]) - your = np.timedelta64(1, bounds[0].dtype.name.replace("datetime64", "")[1:-1]) + if self.dtype == np.datetime64: + if not isinstance(bounds[0], np.datetime64): + raise TypeError("Input bounds should be of type np.datetime64 when selecting data from:", str(self)) - if mine > your: - bounds = [b.astype(my_bounds[0].dtype) for b in bounds] - else: - my_bounds = [b.astype(bounds[0].dtype) for b in my_bounds] + my_bounds, bounds = lower_precision_time_bounds(my_bounds, bounds, outer) lo = max(bounds[0], my_bounds[0]) hi = min(bounds[1], my_bounds[1]) diff --git a/podpac/core/coordinates/utils.py b/podpac/core/coordinates/utils.py index 4c3597afb..1c15b665d 100644 --- a/podpac/core/coordinates/utils.py +++ b/podpac/core/coordinates/utils.py @@ -529,3 +529,41 @@ def rem_vunits(crs): if "+vunits" in crs: crs = re.sub(r"\+vunits=[a-z\-]+", "", crs) return crs + + +def lower_precision_time_bounds(my_bounds, other_bounds, outer): + """ + When given two bounds of np.datetime64, this function will convert both bounds to the lower-precision (in terms of + time unit) numpy datetime4 object if outer==True, otherwise only my_bounds will be converted. + + Parameters + ----------- + my_bounds : List(np.datetime64) + The bounds of the native coordinates of the dataset + other_bounds : List(np.datetime64) + The bounds used for the selection + outer : bool + When the other_bounds are higher precision than the input_bounds, only convert these IF outer=True + + Returns + -------- + my_bounds : List(np.datetime64) + The bounds of the native coordinates of the dataset at the new precision + other_bounds : List(np.datetime64) + The bounds used for the selection at the new precision, if outer == True, otherwise return original coordinates + """ + if not isinstance(other_bounds[0], np.datetime64) or not isinstance(other_bounds[1], np.datetime64): + raise TypeError("Input bounds should be of type np.datetime64 when selecting data from:", str(my_bounds)) + + if not isinstance(my_bounds[0], np.datetime64) or not isinstance(my_bounds[1], np.datetime64): + raise TypeError("Native bounds should be of type np.datetime64 when selecting data using:", str(other_bounds)) + + mine = np.timedelta64(1, my_bounds[0].dtype.name.replace("datetime64", "")[1:-1]) + your = np.timedelta64(1, other_bounds[0].dtype.name.replace("datetime64", "")[1:-1]) + + if mine > your and outer: + other_bounds = [b.astype(my_bounds[0].dtype) for b in other_bounds] + else: + my_bounds = [b.astype(other_bounds[0].dtype) for b in my_bounds] + + return my_bounds, other_bounds From 6b22224ddce974f6a99e0d00758d9c974a286c30 Mon Sep 17 00:00:00 2001 From: Jeffrey Milloy Date: Mon, 2 Dec 2019 16:18:08 -0500 Subject: [PATCH 3/3] Always check the select bounds type. Adds a short-circuit if the coordinates are empty (and dtype is None), and removes the (redundant) bounds type checking in the private _select method. --- podpac/core/coordinates/coordinates1d.py | 18 +++++++++++++++--- .../core/coordinates/uniform_coordinates1d.py | 3 --- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/podpac/core/coordinates/coordinates1d.py b/podpac/core/coordinates/coordinates1d.py index 8159332dd..cecee4176 100644 --- a/podpac/core/coordinates/coordinates1d.py +++ b/podpac/core/coordinates/coordinates1d.py @@ -350,19 +350,31 @@ def select(self, bounds, return_indices=False, outer=False): index or slice for the selected coordinates (only if return_indices=True) """ + # empty case + if self.dtype is None: + return self._select_empty(return_indices) + if isinstance(bounds, dict): bounds = bounds.get(self.name) if bounds is None: return self._select_full(return_indices) bounds = make_coord_value(bounds[0]), make_coord_value(bounds[1]) + + # check type + if not isinstance(bounds[0], self.dtype): + raise TypeError( + "Input bounds do match the coordinates dtype (%s != %s)" % (type(self.bounds[0]), self.dtype) + ) + if not isinstance(bounds[1], self.dtype): + raise TypeError( + "Input bounds do match the coordinates dtype (%s != %s)" % (type(self.bounds[1]), self.dtype) + ) + my_bounds = self.area_bounds.copy() # If the bounds are of instance datetime64, then the comparison should happen at the lowest precision if self.dtype == np.datetime64: - if not isinstance(bounds[0], np.datetime64): - raise TypeError("Input bounds should be of type np.datetime64 when selecting data from:", str(self)) - my_bounds, bounds = lower_precision_time_bounds(my_bounds, bounds, outer) # full diff --git a/podpac/core/coordinates/uniform_coordinates1d.py b/podpac/core/coordinates/uniform_coordinates1d.py index bc2286aa8..46ee1a36e 100644 --- a/podpac/core/coordinates/uniform_coordinates1d.py +++ b/podpac/core/coordinates/uniform_coordinates1d.py @@ -389,9 +389,6 @@ def _select(self, bounds, return_indices, outer): # If the bounds are of instance datetime64, then the comparison should happen at the lowest precision if self.dtype == np.datetime64: - if not isinstance(bounds[0], np.datetime64): - raise TypeError("Input bounds should be of type np.datetime64 when selecting data from:", str(self)) - my_bounds, bounds = lower_precision_time_bounds(my_bounds, bounds, outer) lo = max(bounds[0], my_bounds[0])