From a5b37059f2dd41f0841dccf19411808583c0f0dd Mon Sep 17 00:00:00 2001 From: Jeffrey Milloy Date: Mon, 2 Aug 2021 14:30:29 -0400 Subject: [PATCH 1/9] WIP: minimal Datasource.get_source_data method and test. --- podpac/core/data/datasource.py | 6 ++++++ podpac/core/data/test/test_datasource.py | 18 ++++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/podpac/core/data/datasource.py b/podpac/core/data/datasource.py index c3d7adae..a2ba3470 100644 --- a/podpac/core/data/datasource.py +++ b/podpac/core/data/datasource.py @@ -281,6 +281,12 @@ def _get_data(self, rc, rci): # Methods # ------------------------------------------------------------------------------------------------------------------ + def get_source_data(self, bounds={}): + """""" + + coords = self.coordinates.select(bounds) + return self.eval(coords) + def eval(self, coordinates, **kwargs): """ Wraps the super Node.eval method in order to cache with the correct coordinates. diff --git a/podpac/core/data/test/test_datasource.py b/podpac/core/data/test/test_datasource.py index c63ee3d2..34cce015 100644 --- a/podpac/core/data/test/test_datasource.py +++ b/podpac/core/data/test/test_datasource.py @@ -564,6 +564,24 @@ def test_eval_get_cache_transform_crs(self): node.eval(node.coordinates.transform("EPSG:4326")) assert node._from_cache + def test_get_source_data(self): + node = podpac.data.Array( + source=np.ones((3, 4)), + coordinates=podpac.Coordinates([range(3), range(4)], ["lat", "lon"]), + ) + + data = node.get_source_data() + np.testing.assert_array_equal(data, node.source) + + def test_get_source_data_with_bounds(self): + node = podpac.data.Array( + source=np.ones((3, 4)), + coordinates=podpac.Coordinates([range(3), range(4)], ["lat", "lon"]), + ) + + data = node.get_source_data({"lon": (1.5, 4.5)}) + np.testing.assert_array_equal(data, node.source[:, 2:]) + class TestDataSourceWithMultipleOutputs(object): def test_evaluate_no_overlap_with_output_extract_output(self): From 4d10d021bff26036fb3f5138caec6c697703d18b Mon Sep 17 00:00:00 2001 From: Jeffrey Milloy Date: Mon, 2 Aug 2021 14:36:29 -0400 Subject: [PATCH 2/9] WIP: use _get_data directly in Datasource.get_source_data instead of wrapping eval. --- podpac/core/data/datasource.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/podpac/core/data/datasource.py b/podpac/core/data/datasource.py index a2ba3470..e9d3d9db 100644 --- a/podpac/core/data/datasource.py +++ b/podpac/core/data/datasource.py @@ -284,8 +284,8 @@ def _get_data(self, rc, rci): def get_source_data(self, bounds={}): """""" - coords = self.coordinates.select(bounds) - return self.eval(coords) + coords, I = self.coordinates.select(bounds, return_index=True) + return self._get_data(coords, I) def eval(self, coordinates, **kwargs): """ From 0154e9c26a12b5d792fd6f133ffe0276700f78dd Mon Sep 17 00:00:00 2001 From: Jeffrey Milloy Date: Mon, 2 Aug 2021 14:36:54 -0400 Subject: [PATCH 3/9] WIP: use _get_data directly in Datasource.get_source_data instead of wrapping eval. --- podpac/core/data/datasource.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/podpac/core/data/datasource.py b/podpac/core/data/datasource.py index e9d3d9db..83c00362 100644 --- a/podpac/core/data/datasource.py +++ b/podpac/core/data/datasource.py @@ -282,7 +282,19 @@ def _get_data(self, rc, rci): # ------------------------------------------------------------------------------------------------------------------ def get_source_data(self, bounds={}): - """""" + """ + Get source data, without interpolation. + + Arguments + --------- + bounds : dict + Dictionary of bounds by dimension, optional. + + Returns + ------- + data : UnitsDataArray + Source data + """ coords, I = self.coordinates.select(bounds, return_index=True) return self._get_data(coords, I) From 8d422da64df72ff6e60603ee4b637c23568b1b35 Mon Sep 17 00:00:00 2001 From: Jeffrey Milloy Date: Mon, 2 Aug 2021 15:03:10 -0400 Subject: [PATCH 4/9] WIP: add minimal get_source_data and tests for TileCompositor. --- .../compositor/test/test_tiled_compositor.py | 24 +++++++++++++++++++ podpac/core/compositor/tile_compositor.py | 22 +++++++++++++++++ 2 files changed, 46 insertions(+) diff --git a/podpac/core/compositor/test/test_tiled_compositor.py b/podpac/core/compositor/test/test_tiled_compositor.py index bf424caa..df91f764 100644 --- a/podpac/core/compositor/test/test_tiled_compositor.py +++ b/podpac/core/compositor/test/test_tiled_compositor.py @@ -47,3 +47,27 @@ def test_composition_stacked_multiindex_names(self): np.testing.assert_array_equal(output["lat"], [3, 4, 5, 6]) np.testing.assert_array_equal(output["lon"], [3, 4, 5, 6]) np.testing.assert_array_equal(output, [103, 104, 200, 201]) + + def test_get_source_data(self): + a = ArrayRaw(source=np.arange(5) + 100, coordinates=podpac.Coordinates([[0, 1, 2, 3, 4]], dims=["lat"])) + b = ArrayRaw(source=np.arange(5) + 200, coordinates=podpac.Coordinates([[5, 6, 7, 8, 9]], dims=["lat"])) + c = ArrayRaw(source=np.arange(5) + 300, coordinates=podpac.Coordinates([[10, 11, 12, 13, 14]], dims=["lat"])) + + node = TileCompositorRaw(sources=[a, b, c]) + + data = node.get_source_data() + np.testing.assert_array_equal(data["lat"], np.arange(15)) + np.testing.assert_array_equal(data, np.hstack([source.source for source in node.sources])) + + # with bounds + data = node.get_source_data({"lat": (2.5, 6.5)}) + np.testing.assert_array_equal(data["lat"], [3, 4, 5, 6]) + np.testing.assert_array_equal(data, [103, 104, 200, 201]) + + # error + with podpac.settings: + podpac.settings.set_unsafe_eval(True) + d = podpac.algorithm.Arithmetic(eqn="a+2", a=a) + node = TileCompositorRaw(sources=[a, b, c, d]) + with pytest.raises(ValueError, match="Cannot get composited source data"): + node.get_source_data() diff --git a/podpac/core/compositor/tile_compositor.py b/podpac/core/compositor/tile_compositor.py index 25462c9c..7ffbc84f 100644 --- a/podpac/core/compositor/tile_compositor.py +++ b/podpac/core/compositor/tile_compositor.py @@ -67,6 +67,28 @@ def composite(self, coordinates, data_arrays, result=None): return result return res + def get_source_data(self, bounds={}): + """ + Get composited source data, without interpolation. + + Arguments + --------- + bounds : dict + Dictionary of bounds by dimension, optional. + + Returns + ------- + data : UnitsDataArray + Source data + """ + + if any(not hasattr(source, "get_source_data") for source in self.sources): + raise ValueError("Cannot get composited source data; all sources must be a DataSource or TileCompositor") + + coords = None # n/a + source_data_arrays = (source.get_source_data(bounds) for source in self.sources) # generator + return self.composite(coords, source_data_arrays) + class TileCompositor(InterpolationMixin, TileCompositorRaw): pass From 2d6246f4bcdf3eed5d498ea8f97122cd604a85fb Mon Sep 17 00:00:00 2001 From: Jeffrey Milloy Date: Mon, 2 Aug 2021 15:21:50 -0400 Subject: [PATCH 5/9] DOC: Add get_source_data to the overview doc. --- doc/source/overview.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/doc/source/overview.md b/doc/source/overview.md index 4022f125..1fff1801 100644 --- a/doc/source/overview.md +++ b/doc/source/overview.md @@ -59,6 +59,16 @@ node = podpac.datalib.TerrainTiles(tile_format='geotiff', zoom=8) # ... and more each release ``` +Retrieve the raw source data array at full/native resolution. + +```python +# retrieve full source data +node.get_source_data() + +# retrieve bounded source data +node.get_source_data(bounds={'lat': (40, 45), 'lon': (-70, -75)}) +``` + ## Coordinates Define geospatial and temporal dataset coordinates. From e66a67ce9cdf7f39490ccb455d4ea50103b71b2d Mon Sep 17 00:00:00 2001 From: Jeffrey Milloy Date: Mon, 2 Aug 2021 17:15:05 -0400 Subject: [PATCH 6/9] Update doc/source/overview.md Co-authored-by: mpu-creare --- doc/source/overview.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/overview.md b/doc/source/overview.md index 1fff1801..3dc919b5 100644 --- a/doc/source/overview.md +++ b/doc/source/overview.md @@ -59,7 +59,7 @@ node = podpac.datalib.TerrainTiles(tile_format='geotiff', zoom=8) # ... and more each release ``` -Retrieve the raw source data array at full/native resolution. +Retrieve the raw source data array at full/native resolution. **Note**: Some data source are too large to fit in RAM, and calling this function can crash Python. ```python # retrieve full source data From 215a4c714da7c83b509640ccad41efbfb4923023 Mon Sep 17 00:00:00 2001 From: Jeffrey Milloy Date: Mon, 2 Aug 2021 17:15:19 -0400 Subject: [PATCH 7/9] Update podpac/core/compositor/tile_compositor.py Co-authored-by: mpu-creare --- podpac/core/compositor/tile_compositor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/podpac/core/compositor/tile_compositor.py b/podpac/core/compositor/tile_compositor.py index 7ffbc84f..7eb9a4c7 100644 --- a/podpac/core/compositor/tile_compositor.py +++ b/podpac/core/compositor/tile_compositor.py @@ -83,7 +83,7 @@ def get_source_data(self, bounds={}): """ if any(not hasattr(source, "get_source_data") for source in self.sources): - raise ValueError("Cannot get composited source data; all sources must be a DataSource or TileCompositor") + raise ValueError("Cannot get composited source data; all sources must have `get_source_data` implemented (such as nodes derived from a DataSource or TileCompositor node).") coords = None # n/a source_data_arrays = (source.get_source_data(bounds) for source in self.sources) # generator From cc59d81167639c9acbebd5504230c076bab0b2e7 Mon Sep 17 00:00:00 2001 From: Jeffrey Milloy Date: Mon, 2 Aug 2021 17:18:41 -0400 Subject: [PATCH 8/9] WIP: Improve docstring for get_source_data method. --- podpac/core/compositor/tile_compositor.py | 5 ++++- podpac/core/data/datasource.py | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/podpac/core/compositor/tile_compositor.py b/podpac/core/compositor/tile_compositor.py index 7eb9a4c7..680911c8 100644 --- a/podpac/core/compositor/tile_compositor.py +++ b/podpac/core/compositor/tile_compositor.py @@ -75,6 +75,7 @@ def get_source_data(self, bounds={}): --------- bounds : dict Dictionary of bounds by dimension, optional. + Keys must be dimension names, and values are (min, max) tuples, e.g. ``{'lat': (10, 20)}``. Returns ------- @@ -83,7 +84,9 @@ def get_source_data(self, bounds={}): """ if any(not hasattr(source, "get_source_data") for source in self.sources): - raise ValueError("Cannot get composited source data; all sources must have `get_source_data` implemented (such as nodes derived from a DataSource or TileCompositor node).") + raise ValueError( + "Cannot get composited source data; all sources must have `get_source_data` implemented (such as nodes derived from a DataSource or TileCompositor node)." + ) coords = None # n/a source_data_arrays = (source.get_source_data(bounds) for source in self.sources) # generator diff --git a/podpac/core/data/datasource.py b/podpac/core/data/datasource.py index 83c00362..b03d77ae 100644 --- a/podpac/core/data/datasource.py +++ b/podpac/core/data/datasource.py @@ -289,6 +289,7 @@ def get_source_data(self, bounds={}): --------- bounds : dict Dictionary of bounds by dimension, optional. + Keys must be dimension names, and values are (min, max) tuples, e.g. ``{'lat': (10, 20)}``. Returns ------- From 91be103de3b7fa4eb82206fb6d7be76c972e2594 Mon Sep 17 00:00:00 2001 From: Jeffrey Milloy Date: Tue, 3 Aug 2021 09:06:17 -0400 Subject: [PATCH 9/9] WIP: User-friendly exception for OGR.get_source_data --- podpac/core/data/ogr.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/podpac/core/data/ogr.py b/podpac/core/data/ogr.py index d41a8e5c..33e33591 100644 --- a/podpac/core/data/ogr.py +++ b/podpac/core/data/ogr.py @@ -51,6 +51,26 @@ def extents(self): layer = self.datasource.GetLayerByName(self.layer) return layer.GetExtent() + def get_source_data(self, bounds={}): + """ + Raise a user-friendly exception when calling get_source_data for this node. + + Arguments + --------- + bounds : dict + Dictionary of bounds by dimension, optional. + Keys must be dimension names, and values are (min, max) tuples, e.g. ``{'lat': (10, 20)}``. + + raises + ------ + AttributeError : Cannot get source data for OGR datasources + """ + + raise AttributeError( + "Cannot get source data for OGR datasources. " + "The source data is a vector-based shapefile without a native resolution." + ) + @common_doc(COMMON_NODE_DOC) def _eval(self, coordinates, output=None, _selector=None): if "lat" not in coordinates.udims or "lon" not in coordinates.udims: