From c902a6be18decfcd24d185e78cc4873a17b1d5cf Mon Sep 17 00:00:00 2001 From: Jeffrey Milloy Date: Mon, 26 Nov 2018 09:55:02 -0500 Subject: [PATCH 01/25] initial node_eval / node output caching decorator --- podpac/core/algorithm/algorithm.py | 16 ++++------ podpac/core/algorithm/coord_select.py | 32 ++++++++----------- podpac/core/algorithm/signal.py | 46 ++++++++++----------------- podpac/core/algorithm/stats.py | 11 +++---- podpac/core/compositor.py | 5 +-- podpac/core/data/datasource.py | 16 +++------- podpac/core/data/types.py | 8 ++--- podpac/core/node.py | 46 ++++++++++++++++++++++++++- 8 files changed, 97 insertions(+), 83 deletions(-) diff --git a/podpac/core/algorithm/algorithm.py b/podpac/core/algorithm/algorithm.py index 505d484d8..fec1f2bde 100644 --- a/podpac/core/algorithm/algorithm.py +++ b/podpac/core/algorithm/algorithm.py @@ -56,6 +56,7 @@ def _inputs(self): } @common_doc(COMMON_DOC) + @node_eval def eval(self, coordinates, output=None, method=None): """Evalutes this nodes using the supplied coordinates. @@ -73,8 +74,6 @@ def eval(self, coordinates, output=None, method=None): {eval_return} """ - self._requested_coordinates = coordinates - # evaluate input nodes and keep outputs in self.outputs self.outputs = {} for key, node in self._inputs.items(): @@ -87,16 +86,15 @@ def eval(self, coordinates, output=None, method=None): result = self.algorithm() if isinstance(result, np.ndarray): if output is None: - output = self.create_output_array(output_coordinates) - output.data[:] = result + output = self.create_output_array(output_coordinates, data=result) + else: + output.data[:] = result else: if output is None: - output_coordinates = Coordinates.from_xarray(result.coords) - output = self.create_output_array(output_coordinates) - output[:] = result - output = output.transpose(*[dim for dim in coordinates.dims if dim in result.dims]) + output = result + else: + output[:] = result - self._output = output return output def find_coordinates(self): diff --git a/podpac/core/algorithm/coord_select.py b/podpac/core/algorithm/coord_select.py index 867d36117..0bcbbcee1 100644 --- a/podpac/core/algorithm/coord_select.py +++ b/podpac/core/algorithm/coord_select.py @@ -42,16 +42,6 @@ class ModifyCoordinates(Algorithm): @tl.default('coordinates_source') def _default_coordinates_source(self): return self.source - - def algorithm(self): - """Passthrough of the source data - - Returns - ------- - UnitDataArray - Source evaluated at the expanded coordinates - """ - return self.outputs['source'] @common_doc(COMMON_DOC) def eval(self, coordinates, output=None, method=None): @@ -76,23 +66,27 @@ def eval(self, coordinates, output=None, method=None): """ self._requested_coordinates = coordinates - - modified_coordinates = Coordinates( + self.outputs = {} + self._modified_coordinates = Coordinates( [self.get_modified_coordinates1d(coordinates, dim) for dim in coordinates.dims]) - for dim in modified_coordinates.udims: - if modified_coordinates[dim].size == 0: + + for dim in self._modified_coordinates.udims: + if self._modified_coordinates[dim].size == 0: raise ValueError("Modified coordinates do not intersect with source data (dim '%s')" % dim) - output = super(ModifyCoordinates, self).eval(modified_coordinates, output=output, method=method) - # debugging - self._modified_coordinates = modified_coordinates - self._output = output + self.outputs['source'] = self.source.eval(self._modified_coordinates, output=output, method=method) + + if output = None: + output = self.outputs['source'] + else: + output[:] = self.outputs['source'] + self._output = output return output class ExpandCoordinates(ModifyCoordinates): """Algorithm node used to expand requested coordinates. This is normally used in conjunction with a reduce operation - to calculate, for example, the average temperature over the last month. While this is simple to do when evaluating + to calculate, for example, the average temperature over the last month. While this is simple to do when uating a single node (just provide the coordinates), this functionality is needed for nodes buried deeper in a pipeline. lat, lon, time, alt : List diff --git a/podpac/core/algorithm/signal.py b/podpac/core/algorithm/signal.py index d1eaf2e93..cdd440f35 100644 --- a/podpac/core/algorithm/signal.py +++ b/podpac/core/algorithm/signal.py @@ -75,6 +75,7 @@ class Convolution(Algorithm): _full_kernel = tl.Instance(np.ndarray) @common_doc(COMMON_DOC) + @cache_output def eval(self, coordinates, output=None, method=None): """Evaluates this nodes using the supplied coordinates. @@ -91,20 +92,18 @@ def eval(self, coordinates, output=None, method=None): ------- {eval_return} """ - self._requested_coordinates = coordinates - # This should be aligned with coordinates' dimension order # The size of this kernel is used to figure out the expanded size self._full_kernel = self.get_full_kernel(coordinates) - shape = self._full_kernel.shape - if len(shape) != len(coordinates.shape): - raise ValueError("Kernel shape does not match source data shape") + if len(self._full_kernel.shape) != len(coordinates.shape): + raise ValueError("shape mismatch, kernel does not match source data (%s != %s)" % ( + self._full_kernel.shape, coordinates.shape)) # expand the coordinates exp_coords = [] exp_slice = [] - for dim, s in zip(coordinates.dims, shape): + for dim, s in zip(coordinates.dims, self._full_kernel.shape): coord = coordinates[dim] if s == 1 or not isinstance(coord, UniformCoordinates1d): exp_coords.append(coord) @@ -121,21 +120,25 @@ def eval(self, coordinates, output=None, method=None): coord.step, **coord.properties)) exp_slice.append(slice(-s_start, -s_end)) - exp_coords = Coordinates(exp_coords) exp_slice = tuple(exp_slice) + self._expanded_coordinates = Coordinates(exp_coords) + + # evaluate source using expanded coordinates, convolve, and then slice out original coordinates + self.outputs['source'] = self.source.eval(self._expanded_coordinates, method=method) + + if np.isnan(np.max(self.outputs['source'])): + method = 'direct' + else: + method = 'auto' + + result scipy.signal.convolve(self.outputs['source'], self._full_kernel, mode='same', method=method) + result = result[exp_slice] - # evaluate using expanded coordinates and then reduce down to originally requested coordinates - out = super(Convolution, self).eval(exp_coords, method=method) - result = out[exp_slice] if output is None: output = result else: output[:] = result - # debugging - self._expanded_coordinates = exp_coords - self._output = output - return output @tl.default('kernel') @@ -163,21 +166,6 @@ def get_full_kernel(self, coordinates): """ return self.kernel - def algorithm(self): - """Computes the convolution of the source and the kernel - - Returns - ------- - np.ndarray - Resultant array. - """ - if np.isnan(np.max(self.outputs['source'])): - method = 'direct' - else: method = 'auto' - res = scipy.signal.convolve(self.outputs['source'], self._full_kernel, mode='same', method=method) - return res - - class TimeConvolution(Convolution): """Specialized convolution node that computes temporal convolutions only. diff --git a/podpac/core/algorithm/stats.py b/podpac/core/algorithm/stats.py index 0b837fcc4..f5d1f2d5f 100644 --- a/podpac/core/algorithm/stats.py +++ b/podpac/core/algorithm/stats.py @@ -168,6 +168,7 @@ def iteroutputs(self, coordinates, method=None): yield self.source.eval(chunk, method=method) @common_doc(COMMON_DOC) + @cache_output def eval(self, coordinates, output=None, method=None): """Evaluates this nodes using the supplied coordinates. @@ -185,9 +186,8 @@ def eval(self, coordinates, output=None, method=None): {eval_return} """ - self._requested_coordinates = coordinates - self.dims = self.get_dims(self._requested_coordinates) - self._reduced_coordinates = self._requested_coordinates.drop(self.dims) + self.dims = self.get_dims(coordinates) + self._reduced_coordinates = coordinates.drop(self.dims) if output is None: output = self.create_output_array(self._reduced_coordinates) @@ -206,7 +206,6 @@ def eval(self, coordinates, output=None, method=None): else: output[:] = result - self._output = output return output def reduce(self, x): @@ -911,6 +910,7 @@ def _get_source_coordinates(self, requested_coordinates): return coords @common_doc(COMMON_DOC) + @cache_output def eval(self, coordinates, output=None, method=None): """Evaluates this nodes using the supplied coordinates. @@ -932,7 +932,7 @@ def eval(self, coordinates, output=None, method=None): ValueError If source it not time-depended (required by this node). """ - self._requested_coordinates = coordinates + self._source_coordinates = self._get_source_coordinates(coordinates) if output is None: @@ -956,7 +956,6 @@ def eval(self, coordinates, output=None, method=None): out = out.sel(**{self.groupby:E}).rename({self.groupby: 'time'}) output[:] = out.transpose(*output.dims).data - self._output = output return output def base_ref(self): diff --git a/podpac/core/compositor.py b/podpac/core/compositor.py index b97835301..3db794325 100644 --- a/podpac/core/compositor.py +++ b/podpac/core/compositor.py @@ -199,6 +199,7 @@ def f(src): output[:] = np.nan @common_doc(COMMON_DOC) + @node_eval def eval(self, coordinates, output=None, method=None): """Evaluates this nodes using the supplied coordinates. @@ -216,12 +217,8 @@ def eval(self, coordinates, output=None, method=None): {eval_return} """ - self._requested_coordinates = coordinates - outputs = self.iteroutputs(coordinates, method=method) output = self.composite(outputs, output) - - self._output = output return output def find_coordinates(self): diff --git a/podpac/core/data/datasource.py b/podpac/core/data/datasource.py index 9363a8a6a..74d67aa05 100644 --- a/podpac/core/data/datasource.py +++ b/podpac/core/data/datasource.py @@ -183,7 +183,7 @@ class will be used without modication. # privates _interpolation = tl.Instance(Interpolation) - _evaluated_coordinates = tl.Instance(Coordinates, allow_none=True) + _original_requested_coordinates = tl.Instance(Coordinates, allow_none=True) _requested_source_coordinates = tl.Instance(Coordinates) _requested_source_coordinates_index = tl.List() _requested_source_data = tl.Instance(UnitsDataArray) @@ -238,7 +238,7 @@ def eval(self, coordinates, output=None, method=None): '`coordinate_index_type` is set to `numpy`', UserWarning) # store requested coordinates for debugging - self._requested_coordinates = coordinates + self._original_requested_coordinates = coordinates # check for missing dimensions for c in self.native_coordinates.values(): @@ -264,8 +264,10 @@ def eval(self, coordinates, output=None, method=None): extra.append(c.name) coordinates = coordinates.drop(extra) - self._evaluated_coordinates = coordinates # TODO move this if WCS can be updated to allow that + return self._eval(coordinates, output=output, method=None) + @node_eval + def _eval(self, coordinates, output=None, method=None): # intersect the native coordinates with requested coordinates # to get native coordinates within requested coordinates bounds # TODO: support coordinate_index_type parameter to define other index types @@ -278,8 +280,6 @@ def eval(self, coordinates, output=None, method=None): output = self.create_output_array(coordinates) else: output[:] = np.nan - - self._output = output return output # reset interpolation @@ -299,12 +299,6 @@ def eval(self, coordinates, output=None, method=None): output = self.create_output_array(coordinates) output = self._interpolate(coordinates, output) - # set the order of dims to be the same as that of requested_coordinates - # this is required in case the user supplied an output object with a different dims order - output = output.transpose(*coordinates.dims) - - self._output = output - return output def find_coordinates(self): diff --git a/podpac/core/data/types.py b/podpac/core/data/types.py index b062ef3fc..eab156d69 100644 --- a/podpac/core/data/types.py +++ b/podpac/core/data/types.py @@ -562,14 +562,14 @@ def native_coordinates(self): data wrangling for us... """ - # TODO update so that we don't rely on _evaluated_coordinates - if not self._evaluated_coordinates: + # TODO update so that we don't rely on _requested_coordinates if possible + if not self._requested_coordinates: return self.wcs_coordinates cs = [] for dim in self.wcs_coordinates.dims: - if dim in self._evaluated_coordinates.dims: - c = self._evaluated_coordinates[dim] + if dim in self._requested_coordinates.dims: + c = self._requested_coordinates[dim] if c.size == 1: cs.append(ArrayCoordinates1d(c.coordinates[0], name=dim)) elif isinstance(c, UniformCoordinates1d): diff --git a/podpac/core/node.py b/podpac/core/node.py index 14115c8c6..7a2d8b62b 100644 --- a/podpac/core/node.py +++ b/podpac/core/node.py @@ -93,6 +93,7 @@ class Node(tl.HasTraits): cache_type = tl.Enum([None, 'disk', 'ram'], allow_none=True) node_defaults = tl.Dict(allow_none=True) style = tl.Instance(Style) + debug = tl.Bool(False) # TODO replace with a setting @tl.default('style') def _style_default(self): @@ -101,6 +102,7 @@ def _style_default(self): # debugging _requested_coordinates = tl.Instance(Coordinates, allow_none=True) _output = tl.Instance(UnitsDataArray, allow_none=True) + _from_cache = tl.Bool(allow_none=True, default_value=None) # temporary messages @property @@ -677,4 +679,46 @@ def clear_disk_cache(self, attr='*', node_cache=False, all_cache=False): shutil.rmtree(self.cache_dir) else: for f in glob.glob(self.cache_path(attr)): - os.remove(f) \ No newline at end of file + os.remove(f) + +def node_eval(fn): + """ + Decorator for Node eval methods that handles caching and a user provided output argument. + + fn : function + Node eval method to wrap + + Returns + ------- + wrapper : function + Wrapped node eval method + """ + + cache_key = 'output' + + def wrapper(self, coordinates, output=None): + if self.debug: + self._requested_coordinates = coordinates + + cache_coordinates = coordinates.transform(sorted(coordinates.dims)) # order agnostic caching + if self.has_cache(key, cache_coordinates): + data = self.get_cache(key, cache_coordinates) + if output is not None: + order = [dim for dim in output.dims if dim not in data.dims] + list(data.dims) + output.transpose(*order)[:] = data + self._from_cache = True + else: + data = fn(self, coordinates, output=output,) + self.put_cache(key, data, cache_coordinates) + self._from_cache = False + + # transpose data to match the dims order of the requested coordinates + order = [dim for dim in coordinates.dims if dim in data.dims] + data = data.transpose(*order) + + if self.debug: + self._output = data + + return data + + return wrapper \ No newline at end of file From 403c4550d27ab96d3fe8854e55cffd43c4b61bbd Mon Sep 17 00:00:00 2001 From: Matt Ueckermann Date: Tue, 27 Nov 2018 07:30:58 -0500 Subject: [PATCH 02/25] Renaming cache_output to node_eval and adding imports. --- podpac/core/algorithm/coord_select.py | 4 ++-- podpac/core/algorithm/signal.py | 4 ++-- podpac/core/algorithm/stats.py | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/podpac/core/algorithm/coord_select.py b/podpac/core/algorithm/coord_select.py index 0bcbbcee1..d08c2465f 100644 --- a/podpac/core/algorithm/coord_select.py +++ b/podpac/core/algorithm/coord_select.py @@ -86,7 +86,7 @@ def eval(self, coordinates, output=None, method=None): class ExpandCoordinates(ModifyCoordinates): """Algorithm node used to expand requested coordinates. This is normally used in conjunction with a reduce operation - to calculate, for example, the average temperature over the last month. While this is simple to do when uating + to calculate, for example, the average temperature over the last month. While this is simple to do when evaluating a single node (just provide the coordinates), this functionality is needed for nodes buried deeper in a pipeline. lat, lon, time, alt : List @@ -194,4 +194,4 @@ def get_modified_coordinates1d(self, coords, dim): else: raise ValueError("Invalid selection attrs for '%s'" % dim) - return coords1d \ No newline at end of file + return coords1d diff --git a/podpac/core/algorithm/signal.py b/podpac/core/algorithm/signal.py index cdd440f35..56764cb82 100644 --- a/podpac/core/algorithm/signal.py +++ b/podpac/core/algorithm/signal.py @@ -21,7 +21,7 @@ from podpac.core.node import Node from podpac.core.algorithm.algorithm import Algorithm from podpac.core.utils import common_doc -from podpac.core.node import COMMON_NODE_DOC +from podpac.core.node import COMMON_NODE_DOC, node_eval COMMON_DOC = COMMON_NODE_DOC.copy() COMMON_DOC['full_kernel'] = '''Kernel that contains all the dimensions of the input source, in the correct order. @@ -75,7 +75,7 @@ class Convolution(Algorithm): _full_kernel = tl.Instance(np.ndarray) @common_doc(COMMON_DOC) - @cache_output + @node_eval def eval(self, coordinates, output=None, method=None): """Evaluates this nodes using the supplied coordinates. diff --git a/podpac/core/algorithm/stats.py b/podpac/core/algorithm/stats.py index f5d1f2d5f..fe9cb6c2e 100644 --- a/podpac/core/algorithm/stats.py +++ b/podpac/core/algorithm/stats.py @@ -18,7 +18,7 @@ from podpac.core.node import Node from podpac.core.algorithm.algorithm import Algorithm from podpac.core.utils import common_doc -from podpac.core.node import COMMON_NODE_DOC +from podpac.core.node import COMMON_NODE_DOC, node_eval COMMON_DOC = COMMON_NODE_DOC.copy() @@ -168,7 +168,7 @@ def iteroutputs(self, coordinates, method=None): yield self.source.eval(chunk, method=method) @common_doc(COMMON_DOC) - @cache_output + @node_eval def eval(self, coordinates, output=None, method=None): """Evaluates this nodes using the supplied coordinates. @@ -910,7 +910,7 @@ def _get_source_coordinates(self, requested_coordinates): return coords @common_doc(COMMON_DOC) - @cache_output + @node_eval def eval(self, coordinates, output=None, method=None): """Evaluates this nodes using the supplied coordinates. From 4d90ca2da650df75c791edf5cc5ddc23d6571332 Mon Sep 17 00:00:00 2001 From: Matt Ueckermann Date: Tue, 27 Nov 2018 07:48:42 -0500 Subject: [PATCH 03/25] BUGFIX: Fixing typos and missing imports. * ALSO, adding functools.wraps to decorators so that the __name__ and __doc__ of the decorated function is preserved. --- podpac/core/algorithm/algorithm.py | 1 + podpac/core/algorithm/coord_select.py | 2 +- podpac/core/algorithm/signal.py | 2 +- podpac/core/compositor.py | 1 + podpac/core/data/datasource.py | 1 + podpac/core/node.py | 4 +++- podpac/core/utils.py | 4 +++- 7 files changed, 11 insertions(+), 4 deletions(-) diff --git a/podpac/core/algorithm/algorithm.py b/podpac/core/algorithm/algorithm.py index fec1f2bde..1e02d34fb 100644 --- a/podpac/core/algorithm/algorithm.py +++ b/podpac/core/algorithm/algorithm.py @@ -19,6 +19,7 @@ from podpac.core.units import UnitsDataArray from podpac.core.node import Node from podpac.core.node import COMMON_NODE_DOC +from podpac.core.node import node_eval from podpac.core.utils import common_doc COMMON_DOC = COMMON_NODE_DOC.copy() diff --git a/podpac/core/algorithm/coord_select.py b/podpac/core/algorithm/coord_select.py index d08c2465f..c65555113 100644 --- a/podpac/core/algorithm/coord_select.py +++ b/podpac/core/algorithm/coord_select.py @@ -76,7 +76,7 @@ def eval(self, coordinates, output=None, method=None): self.outputs['source'] = self.source.eval(self._modified_coordinates, output=output, method=method) - if output = None: + if output is None: output = self.outputs['source'] else: output[:] = self.outputs['source'] diff --git a/podpac/core/algorithm/signal.py b/podpac/core/algorithm/signal.py index 56764cb82..c8297d4db 100644 --- a/podpac/core/algorithm/signal.py +++ b/podpac/core/algorithm/signal.py @@ -131,7 +131,7 @@ def eval(self, coordinates, output=None, method=None): else: method = 'auto' - result scipy.signal.convolve(self.outputs['source'], self._full_kernel, mode='same', method=method) + result = scipy.signal.convolve(self.outputs['source'], self._full_kernel, mode='same', method=method) result = result[exp_slice] if output is None: diff --git a/podpac/core/compositor.py b/podpac/core/compositor.py index 3db794325..cacc21303 100644 --- a/podpac/core/compositor.py +++ b/podpac/core/compositor.py @@ -14,6 +14,7 @@ from podpac.core.node import Node from podpac.core.utils import common_doc from podpac.core.node import COMMON_NODE_DOC +from podpac.core.node import node_eval COMMON_DOC = COMMON_NODE_DOC.copy() diff --git a/podpac/core/data/datasource.py b/podpac/core/data/datasource.py index 74d67aa05..e3410e78c 100644 --- a/podpac/core/data/datasource.py +++ b/podpac/core/data/datasource.py @@ -35,6 +35,7 @@ from podpac.core.node import Node from podpac.core.utils import common_doc, trait_is_defined from podpac.core.node import COMMON_NODE_DOC +from podpac.core.node import node_eval from podpac.core.data.interpolate import (Interpolation, Interpolator, NearestNeighbor, INTERPOLATION_SHORTCUTS, INTERPOLATION_DEFAULT) diff --git a/podpac/core/node.py b/podpac/core/node.py index 7a2d8b62b..10b7434b5 100644 --- a/podpac/core/node.py +++ b/podpac/core/node.py @@ -6,6 +6,7 @@ import os from collections import OrderedDict +import functools import json import numpy as np import traitlets as tl @@ -696,6 +697,7 @@ def node_eval(fn): cache_key = 'output' + @functools.wraps(fn) def wrapper(self, coordinates, output=None): if self.debug: self._requested_coordinates = coordinates @@ -721,4 +723,4 @@ def wrapper(self, coordinates, output=None): return data - return wrapper \ No newline at end of file + return wrapper diff --git a/podpac/core/utils.py b/podpac/core/utils.py index 19e8780e3..58104e093 100644 --- a/podpac/core/utils.py +++ b/podpac/core/utils.py @@ -6,6 +6,7 @@ import os import json +import functools import traitlets as tl import numpy as np @@ -41,6 +42,7 @@ def cached_property(func): """ @property + @functools.wraps(func) def f(self): """Summary @@ -173,4 +175,4 @@ def trait_is_defined(obj, trait): val = obj._trait_values[trait] return val is not None except (KeyError, RuntimeError, tl.TraitError): - return False \ No newline at end of file + return False From 38445bb356351545dbce6a10dfeeffbbec5a0da0 Mon Sep 17 00:00:00 2001 From: Matt Ueckermann Date: Tue, 27 Nov 2018 09:32:42 -0500 Subject: [PATCH 04/25] BUGFIX: transform --> transpose. Also adding DEBUG to settings.py. --- podpac/core/node.py | 4 ++-- podpac/settings.py | 13 +++++-------- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/podpac/core/node.py b/podpac/core/node.py index 20919c857..7e2fb6a42 100644 --- a/podpac/core/node.py +++ b/podpac/core/node.py @@ -99,7 +99,7 @@ class Node(tl.HasTraits): cache_type = tl.Enum([None, 'disk', 'ram'], allow_none=True) node_defaults = tl.Dict(allow_none=True) style = tl.Instance(Style) - debug = tl.Bool(False) # TODO replace with a setting + debug = tl.Bool(settings.DEBUG) # TODO replace with a setting @tl.default('style') def _style_default(self): @@ -773,7 +773,7 @@ def wrapper(self, coordinates, output=None): if self.debug: self._requested_coordinates = coordinates - cache_coordinates = coordinates.transform(sorted(coordinates.dims)) # order agnostic caching + cache_coordinates = coordinates.transpose(sorted(coordinates.dims)) # order agnostic caching if self.has_cache(key, cache_coordinates): data = self.get_cache(key, cache_coordinates) if output is not None: diff --git a/podpac/settings.py b/podpac/settings.py index 9fbebce48..0bd9ba661 100644 --- a/podpac/settings.py +++ b/podpac/settings.py @@ -24,8 +24,10 @@ import os +DEBUG = True + CACHE_TO_S3 = False -ROOT_PATH = None +ROOT_PATH = os.path.expanduser('~') # Some settings for testing AWS Lambda function handlers locally AWS_ACCESS_KEY_ID = None AWS_SECRET_ACCESS_KEY = None @@ -38,10 +40,5 @@ if S3_BUCKET_NAME and CACHE_TO_S3: CACHE_DIR = 'cache' else: - if ROOT_PATH: - CACHE_DIR = os.path.abspath(os.path.join(ROOT_PATH, 'cache')) - else: - CACHE_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), 'cache')) - - if not os.path.exists(CACHE_DIR): - os.makedirs(CACHE_DIR) + CACHE_DIR = os.path.abspath(os.path.join(ROOT_PATH, 'cache')) + os.makedirs(CACHE_DIR, exist_ok=True) From 26d59c03b37a10c2cc48d63177d931ad79cb3227 Mon Sep 17 00:00:00 2001 From: Matt Ueckermann Date: Tue, 27 Nov 2018 10:29:38 -0500 Subject: [PATCH 05/25] BUGFIX: Fixing typos and wrong syntax. --- podpac/core/node.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/podpac/core/node.py b/podpac/core/node.py index 7e2fb6a42..d099f010a 100644 --- a/podpac/core/node.py +++ b/podpac/core/node.py @@ -772,8 +772,8 @@ def node_eval(fn): def wrapper(self, coordinates, output=None): if self.debug: self._requested_coordinates = coordinates - - cache_coordinates = coordinates.transpose(sorted(coordinates.dims)) # order agnostic caching + key = 'output' + cache_coordinates = coordinates.transpose(*sorted(coordinates.dims)) # order agnostic caching if self.has_cache(key, cache_coordinates): data = self.get_cache(key, cache_coordinates) if output is not None: From 9268a7639a119af449930e426ea5c6028858adc9 Mon Sep 17 00:00:00 2001 From: Matt Ueckermann Date: Tue, 27 Nov 2018 10:44:05 -0500 Subject: [PATCH 06/25] BUGFIX: Removing the 'method' parameter' --- podpac/core/data/datasource.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/podpac/core/data/datasource.py b/podpac/core/data/datasource.py index 3912f47ec..c7fec9445 100644 --- a/podpac/core/data/datasource.py +++ b/podpac/core/data/datasource.py @@ -301,7 +301,7 @@ def eval(self, coordinates, output=None): # set input coordinates to evaluated coordinates # TODO move this if WCS can be updated to support self._evaluated_coordinates = coordinates - return self._eval(coordinates, output=output, method=None) + return self._eval(coordinates, output=output) @node_eval def _eval(self, coordinates, output=None): From 66250253b7e63288167a9c2947044b18d604945c Mon Sep 17 00:00:00 2001 From: Matt Ueckermann Date: Tue, 27 Nov 2018 13:17:17 -0500 Subject: [PATCH 07/25] BUGFIXES: Removed a few more methods. Had to rework part of the algorithm calls. Somewhat unxepcted. --- podpac/core/algorithm/algorithm.py | 10 +++++++++- podpac/core/algorithm/coord_select.py | 2 +- podpac/core/algorithm/signal.py | 13 +++++-------- podpac/core/coordinates/coordinates.py | 5 +++-- 4 files changed, 18 insertions(+), 12 deletions(-) diff --git a/podpac/core/algorithm/algorithm.py b/podpac/core/algorithm/algorithm.py index a1e6bf612..72d46bba3 100644 --- a/podpac/core/algorithm/algorithm.py +++ b/podpac/core/algorithm/algorithm.py @@ -20,6 +20,7 @@ from podpac.core.coordinates import Coordinates, union from podpac.core.units import UnitsDataArray from podpac.core.node import Node +from podpac.core.node import NodeException from podpac.core.node import COMMON_NODE_DOC from podpac.core.node import node_eval from podpac.core.utils import common_doc @@ -84,11 +85,18 @@ def eval(self, coordinates, output=None): output = self.create_output_array(output_coordinates, data=result) else: output.data[:] = result - else: + elif isinstance(result, xr.DataArray): + if output is None: + output = self.create_output_array(Coordinates.from_xarray(result.coords), data=result.data) + else: + output[:] = result.data + elif isinstance(result, UnitsDataArray): if output is None: output = result else: output[:] = result + else: + raise NodeException return output diff --git a/podpac/core/algorithm/coord_select.py b/podpac/core/algorithm/coord_select.py index d86cafffc..fcdcfd7c4 100644 --- a/podpac/core/algorithm/coord_select.py +++ b/podpac/core/algorithm/coord_select.py @@ -72,7 +72,7 @@ def eval(self, coordinates, output=None): if self._modified_coordinates[dim].size == 0: raise ValueError("Modified coordinates do not intersect with source data (dim '%s')" % dim) - self.outputs['source'] = self.source.eval(self._modified_coordinates, output=output, method=method) + self.outputs['source'] = self.source.eval(self._modified_coordinates, output=output) if output is None: output = self.outputs['source'] diff --git a/podpac/core/algorithm/signal.py b/podpac/core/algorithm/signal.py index 5357df92c..ee2fc2d82 100644 --- a/podpac/core/algorithm/signal.py +++ b/podpac/core/algorithm/signal.py @@ -124,21 +124,18 @@ def eval(self, coordinates, output=None): self._expanded_coordinates = Coordinates(exp_coords) # evaluate source using expanded coordinates, convolve, and then slice out original coordinates - self.outputs['source'] = self.source.eval(self._expanded_coordinates, method=method) + source = self.source.eval(self._expanded_coordinates) - if np.isnan(np.max(self.outputs['source'])): + if np.isnan(np.max(source)): method = 'direct' else: method = 'auto' - result = scipy.signal.convolve(self.outputs['source'], self._full_kernel, mode='same', method=method) + result = scipy.signal.convolve(source, self._full_kernel, mode='same', method=method) result = result[exp_slice] - - # evaluate using expanded coordinates and then reduce down to originally requested coordinates - out = super(Convolution, self).eval(exp_coords) - result = out[exp_slice] + if output is None: - output = result + output = self.create_output_array(coordinates, data=result) else: output[:] = result diff --git a/podpac/core/coordinates/coordinates.py b/podpac/core/coordinates/coordinates.py index 6c6212114..929071f76 100644 --- a/podpac/core/coordinates/coordinates.py +++ b/podpac/core/coordinates/coordinates.py @@ -638,7 +638,8 @@ def properties(self): # 'coord_ref_sys': self.coord_ref_sys, # 'ctype': self.ctype # } - + if len(self.udims) == 0: + return {} c = self[self.udims[0]] return { 'coord_ref_sys': c.coord_ref_sys, @@ -1024,4 +1025,4 @@ def union(coords_list): :class:`concat` """ - return concat(coords_list).unique() \ No newline at end of file + return concat(coords_list).unique() From ef975a7abc4e8d80376534da6d34f650f6763607 Mon Sep 17 00:00:00 2001 From: Matt Ueckermann Date: Tue, 27 Nov 2018 13:27:19 -0500 Subject: [PATCH 08/25] ENH: Making the ROOT_PATH for podpac storage to be ~/.podpac --- podpac/settings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/podpac/settings.py b/podpac/settings.py index 0bd9ba661..8fe27821e 100644 --- a/podpac/settings.py +++ b/podpac/settings.py @@ -27,7 +27,7 @@ DEBUG = True CACHE_TO_S3 = False -ROOT_PATH = os.path.expanduser('~') +ROOT_PATH = os.path.join(os.path.expanduser('~'), '.podpac') # Some settings for testing AWS Lambda function handlers locally AWS_ACCESS_KEY_ID = None AWS_SECRET_ACCESS_KEY = None From 6646db5b35786a3ee65cddaf63e82440f5c5c953 Mon Sep 17 00:00:00 2001 From: Matt Ueckermann Date: Tue, 27 Nov 2018 15:08:04 -0500 Subject: [PATCH 09/25] ENH: Initial caching working. * Note: I had to use hashlib because python's hash function uses a random seed. That meant the hashes were not preserved between executions. * Fixed lots of typos etc. --- podpac/core/cache/cache.py | 5 +++-- podpac/core/node.py | 39 ++++++++++++++++++++++++++++++-------- 2 files changed, 34 insertions(+), 10 deletions(-) diff --git a/podpac/core/cache/cache.py b/podpac/core/cache/cache.py index 18a3393be..bd344dedb 100644 --- a/podpac/core/cache/cache.py +++ b/podpac/core/cache/cache.py @@ -7,6 +7,7 @@ import os from glob import glob import shutil +from hashlib import md5 as hash_alg try: import cPickle # Python 2.7 @@ -140,10 +141,10 @@ def has(self, node, key, coordinates=None, mode=None): class CacheStore(object): def get_hash_val(self, obj): - return hash(obj) + return hash_alg(obj.encode('utf-8')).hexdigest() def hash_node(self, node): - hashable_repr = cPickle.dumps(node.definition) + hashable_repr = node.json return self.get_hash_val(hashable_repr) def hash_coordinates(self, coordinates): diff --git a/podpac/core/node.py b/podpac/core/node.py index d099f010a..b0bc14508 100644 --- a/podpac/core/node.py +++ b/podpac/core/node.py @@ -17,6 +17,7 @@ from podpac.core.utils import common_doc from podpac.core.coordinates import Coordinates from podpac.core.style import Style +from podpac.core.cache import cache COMMON_NODE_DOC = { 'requested_coordinates': @@ -77,6 +78,8 @@ class Node(tl.HasTraits): ---------- cache_type : [None, 'disk', 'ram'] How the output of the nodes should be cached. By default, outputs are not cached. + cache_ctrl: :class:`podpac.core.cache.cache.CacheCtrl` + Class that controls caching. If not provided, uses default based on cache_type. dtype : type The numpy datatype of the output. Currently only ``float`` is supported. node_defaults : dict @@ -97,14 +100,32 @@ class Node(tl.HasTraits): units = Units(default_value=None, allow_none=True) dtype = tl.Any(default_value=float) cache_type = tl.Enum([None, 'disk', 'ram'], allow_none=True) + cache_ctrl = tl.Instance(cache.CacheCtrl, allow_none=True) + + @tl.default('cache_ctrl') + def _cache_ctrl_default(self): + if self.cache_type is None: + return None + elif self.cache_type == 'ram': + raise NotImplementedError('Cachetype RAM has not been implemented') + elif self.cache_type == 'disk': + store = cache.DiskCacheStore(root_cache_dir_path=settings.CACHE_DIR) + ctrl = cache.CacheCtrl(cache_stores=[store]) + + return ctrl + @tl.observe('cache_type') + def _cache_type_changed(self, change): + self.cache_ctrl = self._cache_ctrl_default() + node_defaults = tl.Dict(allow_none=True) style = tl.Instance(Style) - debug = tl.Bool(settings.DEBUG) # TODO replace with a setting @tl.default('style') def _style_default(self): return Style() + debug = tl.Bool(settings.DEBUG) # TODO replace with a setting + # debugging _requested_coordinates = tl.Instance(Coordinates, allow_none=True) _output = tl.Instance(UnitsDataArray, allow_none=True) @@ -429,7 +450,7 @@ def get_cache(self, key, coordinates=None): if not self.has_cache(key, coordinates=coordinates): raise NodeException("cached data not found for key '%s' and cooordinates %s" % (key, coordinates)) - # return cache.get(self, data, key, coordinates=coordinates) + return self.cache_ctrl.get(self, key, coordinates=coordinates) def put_cache(self, data, key, coordinates=None, overwrite=False): """ @@ -454,8 +475,10 @@ def put_cache(self, data, key, coordinates=None, overwrite=False): if not overwrite and self.has_cache(key, coordinates=coordinates): raise NodeException("Cached data already exists for key '%s' and coordinates %s" % (key, coordinates)) - - # cache.put(self, data, key, coordinates=coordinates, overwrite=overwrite) + if self.cache_ctrl is None: + return # Without raising an error? + + self.cache_ctrl.put(self, data, key, coordinates=coordinates, update=overwrite) def has_cache(self, key, coordinates=None): """ @@ -473,9 +496,9 @@ def has_cache(self, key, coordinates=None): bool True if there is cached data for this node, key, and coordinates. """ - - return False - # return cache.has(self, data, key, coordinates=coordinates) + if self.cache_ctrl is None: + return False + return self.cache_ctrl.has(self, key, coordinates=coordinates) def del_cache(self, key=None, coordinates=None): """ @@ -782,7 +805,7 @@ def wrapper(self, coordinates, output=None): self._from_cache = True else: data = fn(self, coordinates, output=output,) - self.put_cache(key, data, cache_coordinates) + self.put_cache(data, key, cache_coordinates) self._from_cache = False # transpose data to match the dims order of the requested coordinates From ae709ed20586b2d9bb83bbffaf249650078d214f Mon Sep 17 00:00:00 2001 From: Matt Ueckermann Date: Tue, 27 Nov 2018 15:19:28 -0500 Subject: [PATCH 10/25] BUGFIX: Moving around where the utf-8 encoding happens so that None can be hashed. --- podpac/core/cache/cache.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/podpac/core/cache/cache.py b/podpac/core/cache/cache.py index bd344dedb..421281dbd 100644 --- a/podpac/core/cache/cache.py +++ b/podpac/core/cache/cache.py @@ -141,18 +141,18 @@ def has(self, node, key, coordinates=None, mode=None): class CacheStore(object): def get_hash_val(self, obj): - return hash_alg(obj.encode('utf-8')).hexdigest() + return hash_alg(obj).hexdigest() def hash_node(self, node): - hashable_repr = node.json + hashable_repr = node.json.encode('utf-8') return self.get_hash_val(hashable_repr) def hash_coordinates(self, coordinates): - hashable_repr = None if coordinates is None else coordinates.json + hashable_repr = None if coordinates is None else coordinates.json.encode('utf-8') return self.get_hash_val(hashable_repr) def hash_key(self, key): - hashable_repr = str(repr(key)) + hashable_repr = str(repr(key)).encode('utf-8') return self.get_hash_val(hashable_repr) def put(self, node, data, key, coordinates=None, update=False): From be5125de5f73e395df1cab22b4096497254641dd Mon Sep 17 00:00:00 2001 From: Matt Ueckermann Date: Tue, 27 Nov 2018 15:40:45 -0500 Subject: [PATCH 11/25] TESTFIX: hashlib doesn't hash None --- podpac/core/cache/cache.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/podpac/core/cache/cache.py b/podpac/core/cache/cache.py index 421281dbd..487d68ef3 100644 --- a/podpac/core/cache/cache.py +++ b/podpac/core/cache/cache.py @@ -148,7 +148,7 @@ def hash_node(self, node): return self.get_hash_val(hashable_repr) def hash_coordinates(self, coordinates): - hashable_repr = None if coordinates is None else coordinates.json.encode('utf-8') + hashable_repr = 'None'.encode('utf-8') if coordinates is None else coordinates.json.encode('utf-8') return self.get_hash_val(hashable_repr) def hash_key(self, key): From 64c1043b955859ecd7f16238088621b035497e45 Mon Sep 17 00:00:00 2001 From: Matt Ueckermann Date: Tue, 27 Nov 2018 16:01:44 -0500 Subject: [PATCH 12/25] BUGFIX: makedirs still throwing an error for the settings file. This should fix it for Python 3. Apparently Python 2 doesn't has exist_ok. --- podpac/core/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/podpac/core/utils.py b/podpac/core/utils.py index f6d86aeda..1a5e4a587 100644 --- a/podpac/core/utils.py +++ b/podpac/core/utils.py @@ -116,7 +116,7 @@ def save_setting(key, value, path=None): """ file = get_settings_file(path) if not os.path.exists(file): - os.makedirs(os.path.dirname(file)) + os.makedirs(os.path.dirname(file, exist_ok=True)) config = {} else: with open(file) as fid: From 3f72aab3e188f4cd43fa801089a4ab194aa7a121 Mon Sep 17 00:00:00 2001 From: Matt Ueckermann Date: Tue, 27 Nov 2018 16:35:16 -0500 Subject: [PATCH 13/25] BUGFIX: Typo on previous commit. Brackets. --- podpac/core/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/podpac/core/utils.py b/podpac/core/utils.py index 1a5e4a587..de5d8c499 100644 --- a/podpac/core/utils.py +++ b/podpac/core/utils.py @@ -116,7 +116,7 @@ def save_setting(key, value, path=None): """ file = get_settings_file(path) if not os.path.exists(file): - os.makedirs(os.path.dirname(file, exist_ok=True)) + os.makedirs(os.path.dirname(file), exist_ok=True) config = {} else: with open(file) as fid: From bc7aa979416e34bdd9acd6b924ff3b7fbfefc03d Mon Sep 17 00:00:00 2001 From: Matt Ueckermann Date: Tue, 27 Nov 2018 17:46:12 -0500 Subject: [PATCH 14/25] BUGFIX: Fixing Python 2 with usage of os.makedirs(ok_exist) parameter. --- podpac/__init__.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/podpac/__init__.py b/podpac/__init__.py index ec4062d09..d13ae5b34 100644 --- a/podpac/__init__.py +++ b/podpac/__init__.py @@ -11,6 +11,27 @@ Description """ + +# Monkey match os.makedirs for Python 2 compatibility +import sys +import os +_osmakedirs = os.makedirs +def makedirs(name, mode=511, exist_ok=False): + try: + _osmakedirs(name, mode) + except Exception as e: + if exist_ok: + pass + else: + raise e +if sys.version_info.major == 2: + makedirs.__doc__ = os.makedirs.__doc__ + os.makedirs = makedirs +else: + del _osmakedirs +del os +del sys + # Public API from podpac.core.coordinates import Coordinates, crange, clinspace from podpac.core.node import Node, NodeException From b4965b294da16bc4eac9bf47b746d934bbd0ae17 Mon Sep 17 00:00:00 2001 From: Matt Ueckermann Date: Tue, 27 Nov 2018 17:47:14 -0500 Subject: [PATCH 15/25] ENH: Further stabilizing hashes for caching using hashlib instead of hash. --- podpac/core/cache/cache.py | 13 +++++++------ podpac/core/coordinates/coordinates.py | 3 ++- podpac/core/node.py | 5 +++-- 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/podpac/core/cache/cache.py b/podpac/core/cache/cache.py index 487d68ef3..85f796e0c 100644 --- a/podpac/core/cache/cache.py +++ b/podpac/core/cache/cache.py @@ -144,16 +144,17 @@ def get_hash_val(self, obj): return hash_alg(obj).hexdigest() def hash_node(self, node): - hashable_repr = node.json.encode('utf-8') - return self.get_hash_val(hashable_repr) + hashable_repr = 'None' if node is None else node.hash + return hashable_repr def hash_coordinates(self, coordinates): - hashable_repr = 'None'.encode('utf-8') if coordinates is None else coordinates.json.encode('utf-8') - return self.get_hash_val(hashable_repr) + hashable_repr = 'None' if coordinates is None else coordinates.hash + return hashable_repr def hash_key(self, key): - hashable_repr = str(repr(key)).encode('utf-8') - return self.get_hash_val(hashable_repr) + #hashable_repr = str(repr(key)).encode('utf-8') + #return self.get_hash_val(hashable_repr) + return key def put(self, node, data, key, coordinates=None, update=False): '''Cache data for specified node. diff --git a/podpac/core/coordinates/coordinates.py b/podpac/core/coordinates/coordinates.py index 929071f76..098bbcce9 100644 --- a/podpac/core/coordinates/coordinates.py +++ b/podpac/core/coordinates/coordinates.py @@ -9,6 +9,7 @@ import itertools import json from collections import OrderedDict +from hashlib import md5 as hash_alg import numpy as np import traitlets as tl @@ -627,7 +628,7 @@ def hash(self): *Note: To be replaced with the __hash__ method.* """ - return hash(self.json) + return hash_alg(self.json.encode('utf-8')).hexdigest() @property def properties(self): diff --git a/podpac/core/node.py b/podpac/core/node.py index b0bc14508..02b146334 100644 --- a/podpac/core/node.py +++ b/podpac/core/node.py @@ -8,6 +8,7 @@ import re from collections import OrderedDict import functools +from hashlib import md5 as hash_alg import json import numpy as np import traitlets as tl @@ -419,7 +420,7 @@ def json_pretty(self): @property def hash(self): - return hash(self.json) + return hash_alg(self.json.encode('utf-8')).hexdigest() # ----------------------------------------------------------------------------------------------------------------- # Caching Interface @@ -795,7 +796,7 @@ def node_eval(fn): def wrapper(self, coordinates, output=None): if self.debug: self._requested_coordinates = coordinates - key = 'output' + key = cache_key cache_coordinates = coordinates.transpose(*sorted(coordinates.dims)) # order agnostic caching if self.has_cache(key, cache_coordinates): data = self.get_cache(key, cache_coordinates) From b373ceea777ae7827b7bd2c1f331f4dff2bf8493 Mon Sep 17 00:00:00 2001 From: Matt Ueckermann Date: Tue, 27 Nov 2018 18:29:57 -0500 Subject: [PATCH 16/25] ENH: Making SMAP robust to offline usage. --- podpac/datalib/smap.py | 50 ++++++++++++++++++++++++------------------ 1 file changed, 29 insertions(+), 21 deletions(-) diff --git a/podpac/datalib/smap.py b/podpac/datalib/smap.py index 4783ca109..6c9d9841b 100644 --- a/podpac/datalib/smap.py +++ b/podpac/datalib/smap.py @@ -134,12 +134,16 @@ def _get_from_url(url, auth_session): auth_session: podpac.core.authentication.EarthDataSession Authenticated EDS session """ - r = auth_session.get(url) - if r.status_code != 200: - r = auth_session.get(url.replace('opendap/', '')) + try: + r = auth_session.get(url) if r.status_code != 200: - raise RuntimeError('HTTP error: <%d>\n' % (r.status_code) - + r.text[:256]) + r = auth_session.get(url.replace('opendap/', '')) + if r.status_code != 200: + raise RuntimeError('HTTP error: <%d>\n' % (r.status_code) + + r.text[:256]) + except requests.ConnectionError as e: + warnings.warn('WARNING: ' + str(e)) + r = None return r @@ -156,32 +160,30 @@ def _infer_SMAP_product_version(product, base_url, auth_session): auth_session: podpac.core.authentication.EarthDataSession Authenticated EDS session """ + r = _get_from_url(base_url, auth_session) - if r.status_code != 200: - r = auth_session.get(url.replace('opendap/', '')) - if r.status_code != 200: - raise RuntimeError('HTTP error: <%d>\n' % (r.status_code) - + r.text[:256]) - m = re.search(product, r.text) - return int(r.text[m.end() + 1: m.end() + 4]) + if r: + m = re.search(product, r.text) + return int(r.text[m.end() + 1: m.end() + 4]) + return int(SMAP_PRODUCT_MAP.sel(product=product, attr='default_version').item()) # NOTE: {rdk} will be substituted for the entry's 'rootdatakey' SMAP_PRODUCT_DICT = { - #'.ver': ['latkey', 'lonkey', 'rootdatakey', 'layerkey' - 'SPL4SMAU': ['cell_lat', 'cell_lon', 'Analysis_Data_', '{rdk}sm_surface_analysis'], - 'SPL4SMGP': ['cell_lat', 'cell_lon', 'Geophysical_Data_', '{rdk}sm_surface'], - 'SPL3SMA': ['{rdk}latitude', '{rdk}longitude', 'Soil_Moisture_Retrieval_Data_', '{rdk}soil_moisture'], - 'SPL3SMAP': ['{rdk}latitude', '{rdk}longitude', 'Soil_Moisture_Retrieval_Data_', '{rdk}soil_moisture'], - 'SPL3SMP': ['{rdk}AM_latitude', '{rdk}AM_longitude', 'Soil_Moisture_Retrieval_Data_', '{rdk}_soil_moisture'], - 'SPL4SMLM': ['cell_lat', 'cell_lon', 'Land_Model_Constants_Data_', ''], - 'SPL2SMAP_S': ['{rdk}latitude_1km', '{rdk}longitude_1km', 'Soil_Moisture_Retrieval_Data_1km_', '{rdk}soil_moisture_1km'], + #'.ver': ['latkey', 'lonkey', 'rootdatakey', 'layerkey' 'default_verison' + 'SPL4SMAU': ['cell_lat', 'cell_lon', 'Analysis_Data_', '{rdk}sm_surface_analysis', 4], + 'SPL4SMGP': ['cell_lat', 'cell_lon', 'Geophysical_Data_', '{rdk}sm_surface', 4], + 'SPL3SMA': ['{rdk}latitude', '{rdk}longitude', 'Soil_Moisture_Retrieval_Data_', '{rdk}soil_moisture', 4], + 'SPL3SMAP': ['{rdk}latitude', '{rdk}longitude', 'Soil_Moisture_Retrieval_Data_', '{rdk}soil_moisture', 4], + 'SPL3SMP': ['{rdk}AM_latitude', '{rdk}AM_longitude', 'Soil_Moisture_Retrieval_Data_', '{rdk}_soil_moisture', 4], + 'SPL4SMLM': ['cell_lat', 'cell_lon', 'Land_Model_Constants_Data_', '', 4], + 'SPL2SMAP_S': ['{rdk}latitude_1km', '{rdk}longitude_1km', 'Soil_Moisture_Retrieval_Data_1km_', '{rdk}soil_moisture_1km', 4], } SMAP_PRODUCT_MAP = xr.DataArray(list(SMAP_PRODUCT_DICT.values()), dims=['product', 'attr'], coords={'product': list(SMAP_PRODUCT_DICT.keys()), - 'attr':['latkey', 'lonkey', 'rootdatakey', 'layerkey'] + 'attr':['latkey', 'lonkey', 'rootdatakey', 'layerkey', 'default_version'] } ) @@ -419,6 +421,8 @@ def _property_source_default(self): url = SMAP_BASE_URL + \ '/SPL4SMLM.%03d/2015.03.31/' % (v) r = _get_from_url(url, self.auth_session) + if not r: + return 'None' n = self.file_url_re.search(r.text).group() return url + n @@ -664,6 +668,8 @@ def get_available_coords_sources(self): """ url = self.source r = _get_from_url(url, self.auth_session) + if r is None: + return np.array([]), None, np.array([]) soup = bs4.BeautifulSoup(r.text, 'lxml') a = soup.find_all('a') file_regex = self.file_url_re @@ -848,6 +854,8 @@ def get_available_times_dates(self): """ url = '/'.join([self.base_url, '%s.%03d' % (self.product, self.version)]) r = _get_from_url(url, self.auth_session) + if r is None: + return np.array([]), [] soup = bs4.BeautifulSoup(r.text, 'lxml') a = soup.find_all('a') regex = self.date_url_re From 8874dfe2568e8ae73220427047735f2d00f0b4cd Mon Sep 17 00:00:00 2001 From: Matt Ueckermann Date: Thu, 29 Nov 2018 08:11:23 -0500 Subject: [PATCH 17/25] BUG: Fixing various bugs for the downscaling demo. * Compositor wasn't passing along the interpolation attribute * WCS was making the wrong coordinates when time wasn't available * SMAP throws a RUNTIME error when there is a connection, but the password is wrong. We now catch this and print a warning instead of failing catastrophically. * Fixing the layerkey attribute of the SMAPProperties node. This either changed in SMAP or has been wrong for a while. --- podpac/core/compositor.py | 2 +- podpac/core/data/types.py | 5 ++++- podpac/datalib/smap.py | 4 +++- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/podpac/core/compositor.py b/podpac/core/compositor.py index 2a4705906..6c8935f95 100644 --- a/podpac/core/compositor.py +++ b/podpac/core/compositor.py @@ -166,7 +166,7 @@ def iteroutputs(self, coordinates): # Set the interpolation properties for sources if self.interpolation: for s in src_subset.ravel(): - if trait_is_defined(s, 'interpolation'): + if trait_is_defined(self, 'interpolation'): s.interpolation = self.interpolation # Optimization: if coordinates complete and source coords is 1D, diff --git a/podpac/core/data/types.py b/podpac/core/data/types.py index 2090ac753..1ef12af39 100644 --- a/podpac/core/data/types.py +++ b/podpac/core/data/types.py @@ -776,7 +776,10 @@ def get_wcs_coordinates(self): timedomain = capabilities.find("wcs:temporaldomain") if timedomain is None: - return Coordinates([UniformCoordinates1d(top, bottom, size=size[1], name='lat')]) + return Coordinates([ + UniformCoordinates1d(top, bottom, size=size[1], name='lat'), + UniformCoordinates1d(left, right, size=size[0], name='lon') + ]) date_re = re.compile('[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}') times = str(timedomain).replace('', '').replace('', '').split('\n') diff --git a/podpac/datalib/smap.py b/podpac/datalib/smap.py index 6c9d9841b..75358635a 100644 --- a/podpac/datalib/smap.py +++ b/podpac/datalib/smap.py @@ -144,6 +144,8 @@ def _get_from_url(url, auth_session): except requests.ConnectionError as e: warnings.warn('WARNING: ' + str(e)) r = None + except RuntimeError as e: + warnings.warn('WARNING: ' + str(e)) return r @@ -439,7 +441,7 @@ def _property_source_default(self): @tl.default('layerkey') def _layerkey_default(self): - return self.property + return '{rdk}' + self.property @common_doc(COMMON_DOC) def get_native_coordinates(self): From 5b4af0f9e93ba1a5c5a9fd949c46980b43eb4ea8 Mon Sep 17 00:00:00 2001 From: Matt Ueckermann Date: Thu, 29 Nov 2018 13:50:16 -0500 Subject: [PATCH 18/25] ENH: Removing Deprecated caching methods from Node. * For unit-test to work, had to do some sanitization of directory name -- need to do a better job here in cache.py * Added the 'raise_no_cache_exception' parameter to put_cache -- this is so that nodes will execute fine and work properly even when no cache is available, but throw an explicit error when caching is absolutely required. * Add the 'mode' and 'all_cache' parameters to 'rem_cache' (which was also renamed from 'del_cache') * Added cache_update and cache_output attributes to Node. The first will update the cache from the source. The second will optionally cache node outputs, ignoring the node_eval decorator caching. * Added the cached_property decorator. It can track dependencies. However, tagged attributes are automatically tracked through dependencies because the key based on the node definition changes! * Fixed/added unit tests * Removed deprecated unit tests * Updated SMAP to use the new caching interface. SMAP uses cache_type='disk' by default * Added some better error messages/warnings when connectivity is not available. --- podpac/core/cache/cache.py | 7 +- podpac/core/node.py | 395 +++++++++++---------------------- podpac/core/test/test_node.py | 403 ++++++++++++++++++++-------------- podpac/datalib/smap.py | 88 ++++---- 4 files changed, 416 insertions(+), 477 deletions(-) diff --git a/podpac/core/cache/cache.py b/podpac/core/cache/cache.py index 85f796e0c..cc31c4626 100644 --- a/podpac/core/cache/cache.py +++ b/podpac/core/cache/cache.py @@ -316,7 +316,8 @@ def cache_dir(self, node): basedir = self._root_dir_path subdir = str(node.__class__)[8:-2].split('.') dirs = [basedir] + subdir - return os.path.join(*dirs) + # TODO: Use a function to sanitize directory name + return os.path.join(*dirs).replace('<', '').replace('>', '') def cache_filename(self, node, key, coordinates): pre = str(node.base_ref).replace('/', '_').replace('\\', '_').replace(':', '_') @@ -346,6 +347,7 @@ def cleanse_filename_str(self, s): return s def put(self, node, data, key, coordinates=None, update=False): + self.make_cache_dir(node) listing = CacheListing(node=node, key=key, coordinates=coordinates, data=data) if self.has(node, key, coordinates): # a little inefficient but will do for now @@ -427,4 +429,5 @@ def has(self, node, key, coordinates=None): if c.has(listing): return True return False - + + diff --git a/podpac/core/node.py b/podpac/core/node.py index 02b146334..9fcfb4351 100644 --- a/podpac/core/node.py +++ b/podpac/core/node.py @@ -77,6 +77,10 @@ class Node(tl.HasTraits): Attributes ---------- + cache_output: bool + Default if True. Should the node's output be cached? + cache_update: bool + Default is False. Should the node's cached output be updated from the source data? cache_type : [None, 'disk', 'ram'] How the output of the nodes should be cached. By default, outputs are not cached. cache_ctrl: :class:`podpac.core.cache.cache.CacheCtrl` @@ -100,6 +104,8 @@ class Node(tl.HasTraits): units = Units(default_value=None, allow_none=True) dtype = tl.Any(default_value=float) + cache_output = tl.Bool(True) + cache_update = tl.Bool(False) cache_type = tl.Enum([None, 'disk', 'ram'], allow_none=True) cache_ctrl = tl.Instance(cache.CacheCtrl, allow_none=True) @@ -453,7 +459,7 @@ def get_cache(self, key, coordinates=None): return self.cache_ctrl.get(self, key, coordinates=coordinates) - def put_cache(self, data, key, coordinates=None, overwrite=False): + def put_cache(self, data, key, coordinates=None, overwrite=False, raise_no_cache_exception=True): """ Cache data for this node. @@ -465,19 +471,26 @@ def put_cache(self, data, key, coordinates=None, overwrite=False): Unique key for the data, e.g. 'output' coordinates : podpac.Coordinates, optional Coordinates that the cached data depends on. Omit for coordinate-independent data. - overwrite : bool + overwrite : bool, optional Overwrite existing data, default False + raise_no_cache_exception: bool, optional + Raises a NodeException if trying to put data to the cache, but no cache is available. Raises ------ NodeException Cached data already exists (and overwrite is False) + NodeException + No cache_ctrl available and raise_no_cache_exception is True """ if not overwrite and self.has_cache(key, coordinates=coordinates): raise NodeException("Cached data already exists for key '%s' and coordinates %s" % (key, coordinates)) if self.cache_ctrl is None: - return # Without raising an error? + if raise_no_cache_exception: + raise NodeException('Trying to cache data but no cache_ctrl available. Specify cache_type.') + else: + return self.cache_ctrl.put(self, data, key, coordinates=coordinates, update=overwrite) @@ -501,7 +514,7 @@ def has_cache(self, key, coordinates=None): return False return self.cache_ctrl.has(self, key, coordinates=coordinates) - def del_cache(self, key=None, coordinates=None): + def rem_cache(self, key=None, coordinates=None, mode=None, all_cache=False): """ Clear cached data for this node. @@ -512,271 +525,27 @@ def del_cache(self, key=None, coordinates=None): coordinates : podpac.Coordinates, optional Delete cached objects for these coordinates. If None, cached data is deleted for all coordinates, including coordinate-independent data. - """ - - pass - # return cache.rem(self, data, key, coordinates=coordinates) - - # ----------------------------------------------------------------------------------------------------------------- - # Deprecated methods - # ----------------------------------------------------------------------------------------------------------------- - - def _get_filename(self, name, coordinates): - return '%s_%s_%s' % (name, self.hash, coordinates.hash) - - def _get_output_path(self, outdir=None): - if outdir is None: - outdir = settings.CACHE_DIR - if not os.path.exists(outdir): - os.makedirs(outdir) - return outdir - - def write(self, name, outdir=None, fmt='pickle'): - """Write the most recent evaluation output to disk using the specified format - - Parameters - ---------- - name : str - Name of the file prefix. The final filename will have __. - outdir : None, optional - {outdir} - fmt : str - Output format, default 'pickle' - - Returns - -------- - str - The path of the loaded file - - Raises - ------ - NotImplementedError - format not yet implemented - ValueError - invalid format - - .. deprecated:: 0.2.0 - This method will be removed and replaced by the caching module by version 0.2.0. - """ - - import warnings - warnings.warn('Node.write will be removed in a later release', DeprecationWarning) - - try: - import cPickle # Python 2.7 - except: - import _pickle as cPickle - - coordinates = self._requested_coordinates - path = os.path.join(self._get_output_path(outdir=outdir), self._get_filename(name, coordinates=coordinates)) - - if fmt == 'pickle': - path = '%s.pkl' % path - with open(path, 'wb') as f: - cPickle.dump(self._output, f) - elif fmt == 'png': - raise NotImplementedError("format '%s' not yet implemented" % fmt) - elif fmt == 'geotif': - raise NotImplementedError("format '%s' not yet implemented" % fmt) - else: - raise ValueError("invalid format, '%s' not recognized" % fmt) - - return path - - def load(self, name, coordinates, outdir=None): - """Retrieves cached output from disk as though the node has been evaluated + mode: str, optional + Specify which cache stores are affected. + all_cache: bool, optional + Default is False. If True, deletes all of the cache. - Parameters - ---------- - name : str - Name of the file prefix. - coordinates : podpac.Coordinates - {requested_coordinates} - outdir : str, optional - {outdir} - - Returns - -------- - str - The path of the loaded file - - .. deprecated:: 0.2.0 - This method will be removed and replaced by the caching module by version 0.2.0. - """ - - import warnings - warnings.warn('Node.load will be removed in a later release', DeprecationWarning) - - try: - import cPickle # Python 2.7 - except: - import _pickle as cPickle - - path = os.path.join(self._get_output_path(outdir=outdir), self._get_filename(name, coordinates=coordinates)) - path = '%s.pkl' % path # assumes pickle - with open(path, 'rb') as f: - self._output = cPickle.load(f) - return path - - @property - def cache_dir(self): - """Return the directory used for caching - - Returns - ------- - str - Path to the default cache directory - - .. deprecated:: 0.2.0 - This method will be removed and replaced by the caching module by version 0.2.0. + See Also + --------- + `podpac.core.cache.cache.CacheCtrl.rem` """ - - import warnings - warnings.warn('Node.cache_dir will be removed in a later release', DeprecationWarning) - - basedir = settings.CACHE_DIR - subdir = str(self.__class__)[8:-2].split('.') - dirs = [basedir] + subdir - return os.path.join(*dirs) - - def cache_path(self, filename): - """Return the cache path for the file - - Parameters - ---------- - filename : str - Name of the cached file - - Returns - ------- - str - Path to the cached file - - .. deprecated:: 0.2.0 - This method will be removed and replaced by the caching module by version 0.2.0. - """ - - import warnings - warnings.warn('Node.cache_path will be removed in a later release', DeprecationWarning) - - pre = str(self.source).replace('/', '_').replace('\\', '_').replace(':', '_') - return os.path.join(self.cache_dir, pre + '_' + filename) - - def cache_obj(self, obj, filename): - """Cache the input object using the given filename - - Parameters - ---------- - obj : object - Object to be cached to disk - filename : str - File name for the object to be cached - - .. deprecated:: 0.2.0 - This method will be removed and replaced by the caching module by version 0.2.0. - """ - - import warnings - warnings.warn('Node.cache_obj will be replaced by put_cache in a later release', DeprecationWarning) - - try: - import cPickle # Python 2.7 - except: - import _pickle as cPickle - - try: - import boto3 - except: - boto3 = None - - path = self.cache_path(filename) - if settings.S3_BUCKET_NAME is None or settings.CACHE_TO_S3 == False: - if not os.path.exists(self.cache_dir): - os.makedirs(self.cache_dir) - with open(path, 'wb') as fid: - cPickle.dump(obj, fid)#, protocol=cPickle.HIGHEST_PROTOCOL) - else: - s3 = boto3.resource('s3').Bucket(settings.S3_BUCKET_NAME) - io = BytesIO(cPickle.dumps(obj)) - s3.upload_fileobj(io, path) - - def load_cached_obj(self, filename): - """Retreive an object from cache - - Parameters - ---------- - filename : str - File name of object to be retrieved from cache - - Returns - ------- - object - Object loaded from cache - - .. deprecated:: 0.2.0 - This method will be removed and replaced by the caching module by version 0.2.0. - """ - - import warnings - warnings.warn('Node.load_cached_obj will be replaced by get_cache in a later release', DeprecationWarning) - - try: - import cPickle # Python 2.7 - except: - import _pickle as cPickle - - try: - import boto3 - except: - boto3 = None - - path = self.cache_path(filename) - if settings.S3_BUCKET_NAME is None or not settings.CACHE_TO_S3: - with open(path, 'rb') as fid: - obj = cPickle.load(fid) - else: - s3 = boto3.resource('s3').Bucket(settings.S3_BUCKET_NAME) - io = BytesIO() - s3.download_fileobj(path, io) - io.seek(0) - obj = cPickle.loads(io.read()) - return obj - - def clear_disk_cache(self, attr='*', node_cache=False, all_cache=False): - """Helper function to clear disk cache. - - WARNING: This function will permanently delete cached values - - Parameters - ---------- - attr : str, optional - Default '*'. Specific attribute to be cleared for specific - instance of this Node. By default all attributes are cleared. - node_cache : bool, optional - Default False. If True, will ignore `attr` and clear all attributes - for all variants/instances of this Node. - all_cache : bool, optional - Default False. If True, will clear the entire podpac cache. - - .. deprecated:: 0.2.0 - This method will be removed and replaced by the caching module by version 0.2.0. - """ - - import warnings - warnings.warn('Node.clear_disk_cache will be replaced by del_cache in a later release', DeprecationWarning) - - import glob - import shutil - + if self.cache_ctrl is None: + return if all_cache: - shutil.rmtree(settings.CACHE_DIR) - elif node_cache: - shutil.rmtree(self.cache_dir) + self.cache_ctrl.rem() else: - for f in glob.glob(self.cache_path(attr)): - os.remove(f) + self.cache_ctrl.rem(self, key=key, coordinates=coordinates, mode=mode) +#--------------------------------------------------------# +# Decorators +#--------------------------------------------------------# + def node_eval(fn): """ Decorator for Node eval methods that handles caching and a user provided output argument. @@ -798,7 +567,7 @@ def wrapper(self, coordinates, output=None): self._requested_coordinates = coordinates key = cache_key cache_coordinates = coordinates.transpose(*sorted(coordinates.dims)) # order agnostic caching - if self.has_cache(key, cache_coordinates): + if self.has_cache(key, cache_coordinates) and not self.cache_update: data = self.get_cache(key, cache_coordinates) if output is not None: order = [dim for dim in output.dims if dim not in data.dims] + list(data.dims) @@ -806,7 +575,9 @@ def wrapper(self, coordinates, output=None): self._from_cache = True else: data = fn(self, coordinates, output=output,) - self.put_cache(data, key, cache_coordinates) + if self.cache_output: + self.put_cache(data, key, cache_coordinates, overwrite=self.cache_update, + raise_no_cache_exception=False) self._from_cache = False # transpose data to match the dims order of the requested coordinates @@ -817,5 +588,97 @@ def wrapper(self, coordinates, output=None): self._output = data return data - return wrapper + +def cached_property(key, depends=None, raise_no_cache_exception=False): + """ + Decorating for caching a function's output based on a key. + + Parameters + ----------- + key: str + Key used for caching. + depends: str, list, traitlets.All (optional) + Default is None. Any traits that the cached property depends on. The cached_property may NOT + change the value of any of these dependencies (this will result in a RecursionError) + raise_no_cache_exception: bool, optional + Raises a NodeException if trying to put data to the cache, but no cache is available. + + + Notes + ----- + This decorator cannot handle function input parameters. + + If the function uses any tagged attributes, these will essentially operate like dependencies + because the cache key changes based on the node definition, which is affected by tagged attributes. + + Examples + ---------- + >>> class MyClass(Node): + value = 0 + @cached_func('native_coordinates') + def square_value(self): + self.value += 1 + return self.value + @cached_func('native_coordinates', depends='value') + def square_value_depends(self): + return self.value + + >>> n = MyClass() + >>> n.get_value() # The function as defined is called + 1 + >>> n.get_value() # The function as defined is called again, since we have no caching specified + 2 + >>> n.cache_type = 'disk' + >>> n.get_value() # The function as defined is called again, and the value is stored to disk + 3 + >>> n.get_value() # The value is retrieved from disk, note the change in n.value is not captured + 3 + >>> n.get_value_depends() # The function as defined is called, and the value is stored to disk + 4 + >>> n.get_value_depends() # The value is retrieved from disk + 4 + + >>> n.value += 1 + >>> n.get_value_depends() # The function as defined is called, and the value is stored to disk. Note the change in n.value is captured. + 5 + """ + # This is the actual decorator which will be evaluated and returns the wrapped function + def cache_decorator(func): + # This is the initial wrapper that sets up the observations + @functools.wraps(func) + def cache_wrapper(self): + # This is the function that updates the cached based on observed traits + def cache_updator(change): + # print("Updating value on self:", id(self)) + out = func(self) + self.put_cache(out, key, overwrite=True, raise_no_cache_exception=raise_no_cache_exception) + + if depends: + # This sets up the observer on the dependent traits + # print ("setting up observer on self: ", id(self)) + self.observe(cache_updator, depends) + # Since attributes could change on instantiation, anything we previously + # stored is likely out of date. So, force and update to the cache. + cache_updator(None) + + # This is the final wrapper the continues to fetch data from cache + # after the observer has been set up. + @functools.wraps(func) + def cached_function(): + try: + out = self.get_cache(key) + except NodeException: + out = func(self) + self.put_cache(out, key, raise_no_cache_exception=raise_no_cache_exception) + return out + + # Since this is the first time the function is run, set the new wrapper + # on the class instance so that the current function won't be called again + # (which would set up an additional observer) + setattr(self, func.__name__, cached_function) + + # Return the value on the first run + return cached_function() + return cache_wrapper + return cache_decorator diff --git a/podpac/core/test/test_node.py b/podpac/core/test/test_node.py index 9fd11315c..6c9ec9627 100644 --- a/podpac/core/test/test_node.py +++ b/podpac/core/test/test_node.py @@ -215,208 +215,283 @@ def test_create_output_array_dtype(self): assert output.dtype == node.dtype assert np.all(~output) -@pytest.mark.skip("TODO") +# @pytest.mark.skip("TODO") class TestCaching(object): @classmethod def setup_class(cls): class MyNode(Node): pass - cls.node = MyNode() - cls.node.del_cache() + cls.node = MyNode(cache_type='disk') + cls.node.rem_cache() cls.coords = podpac.Coordinates([0, 0], dims=['lat', 'lon']) cls.coords2 = podpac.Coordinates([1, 1], dims=['lat', 'lon']) @classmethod def teardown_class(cls): - cls.node.del_cache() + cls.node.rem_cache() def setup_method(self, method): - self.node.del_cache() + self.node.rem_cache() def teardown_method(self, method): - self.node.del_cache() + self.node.rem_cache() - def test_has(self): - assert not self.node.has('test') + def test_has_cache(self): + assert not self.node.has_cache('test') - self.node.put(0, 'test') - assert self.node.has('test') - assert not self.node.has('test', coordinates=self.coords) + self.node.put_cache(0, 'test') + assert self.node.has_cache('test') + assert not self.node.has_cache('test', coordinates=self.coords) def test_has_coordinates(self): - assert not self.node.has('test', coordinates=self.coords) + assert not self.node.has_cache('test', coordinates=self.coords) - self.node.put(0, 'test', coordinates=self.coords) + self.node.put_cache(0, 'test', coordinates=self.coords) - assert not self.node.has('test') - assert self.node.has('test', coordinates=self.coords) - assert not self.node.has('test', coordinates=self.coords2) + assert not self.node.has_cache('test') + assert self.node.has_cache('test', coordinates=self.coords) + assert not self.node.has_cache('test', coordinates=self.coords2) - def test_get_put(self): + def test_get_put_cache(self): with pytest.raises(NodeException): - self.node.get('test') + self.node.get_cache('test') - self.node.put(0, 'test') - assert self.node.get('test') == 0 + self.node.put_cache(0, 'test') + assert self.node.get_cache('test') == 0 def test_get_put_coordinates(self): with pytest.raises(NodeException): - self.node.get('test') + self.node.get_cache('test') with pytest.raises(NodeException): - self.node.get('test', coordinates=self.coords) + self.node.get_cache('test', coordinates=self.coords) with pytest.raises(NodeException): - self.node.get('test', coordinates=self.coords2) + self.node.get_cache('test', coordinates=self.coords2) - self.node.put(0, 'test') - self.node.put(1, 'test', coordinates=self.coords) - self.node.put(2, 'test', coordinates=self.coords2) + self.node.put_cache(0, 'test') + self.node.put_cache(1, 'test', coordinates=self.coords) + self.node.put_cache(2, 'test', coordinates=self.coords2) - assert self.node.get('test') == 0 - assert self.node.get('test', coordinates=self.coords) == 1 - assert self.node.get('test', coordinates=self.coords2) == 2 + assert self.node.get_cache('test') == 0 + assert self.node.get_cache('test', coordinates=self.coords) == 1 + assert self.node.get_cache('test', coordinates=self.coords2) == 2 def test_put_overwrite(self): - self.node.put(0, 'test') - assert self.node.get('test') == 0 + self.node.put_cache(0, 'test') + assert self.node.get_cache('test') == 0 with pytest.raises(NodeException): - self.node.put('test', 1) - - self.node.put(1, 'test', overwrite=True) - assert self.node.get('test') == 1 - - def test_del_all(self): - self.node.put(0, 'a') - self.node.put(0, 'b') - self.node.put(0, 'a', coordinates=self.coords) - self.node.put(0, 'c', coordinates=self.coords) - self.node.put(0, 'c', coordinates=self.coords2) - self.node.put(0, 'd', coordinates=self.coords) - - self.node.del_cache() - assert not self.has_cache('a') - assert not self.has_cache('b') - assert not self.has_cache('a', coordinates=self.coords) - assert not self.has_cache('c', coordinates=self.coords) - assert not self.has_cache('c', coordinates=self.coords2) - assert not self.has_cache('d', coordinates=self.coords) - - def test_del_key(self): - self.node.put(0, 'a') - self.node.put(0, 'b') - self.node.put(0, 'a', coordinates=self.coords) - self.node.put(0, 'c', coordinates=self.coords) - self.node.put(0, 'c', coordinates=self.coords2) - self.node.put(0, 'd', coordinates=self.coords) - - self.node.del_cache(key='a') - - assert not self.has_cache('a') - assert not self.has_cache('a', coordinates=self.coords) - assert self.has_cache('b') - assert self.has_cache('c', coordinates=self.coords) - assert self.has_cache('c', coordinates=self.coords2) - assert self.has_cache('d', coordinates=self.coords) - - def test_del_coordinates(self): - self.node.put(0, 'a') - self.node.put(0, 'b') - self.node.put(0, 'a', coordinates=self.coords) - self.node.put(0, 'c', coordinates=self.coords) - self.node.put(0, 'c', coordinates=self.coords2) - self.node.put(0, 'd', coordinates=self.coords) - - self.node.del_cache(coordinates=self.coords) - - assert self.has_cache('a') - assert not self.has_cache('a', coordinates=self.coords) - assert self.has_cache('b') - assert not self.has_cache('c', coordinates=self.coords) - assert self.has_cache('c', coordinates=self.coords2) - assert not self.has_cache('d', coordinates=self.coords) - - def test_del_key_coordinates(self): - self.node.put(0, 'a') - self.node.put(0, 'b') - self.node.put(0, 'a', coordinates=self.coords) - self.node.put(0, 'c', coordinates=self.coords) - self.node.put(0, 'c', coordinates=self.coords2) - self.node.put(0, 'd', coordinates=self.coords) - - self.node.del_cache(key='a', cordinates=self.coords) - - assert self.has_cache('a') - assert not self.has_cache('a', coordinates=self.coords) - assert self.has_cache('b') - assert self.has_cache('c', coordinates=self.coords) - assert self.has_cache('c', coordinates=self.coords2) - assert self.has_cache('d', coordinates=self.coords) - -class TestDeprecatedMethods(object): - def setup_method(self): - self.paths_to_remove = [] - - def teardown_method(self): - for path in self.paths_to_remove: - try: - os.remove(path) - except: - pass - - def test_write(self): - n = Node() - c = podpac.Coordinates([0, 1], dims=['lat', 'lon']) - n._requested_coordinates = c # hack instead of evaluating the node - n._output = UnitsDataArray([0, 1]) - p = n.write('temp_test') - self.paths_to_remove.append(p) - - assert os.path.exists(p) - - def test_load(self): - c = podpac.Coordinates([0, 1], dims=['lat', 'lon']) - fn = 'temp_test' - - n1 = Node() - n1._output = UnitsDataArray([0, 1]) - n1._requested_coordinates = c # hack instead of evaluating the node - p1 = n1.write(fn) - self.paths_to_remove.append(p1) - - n2 = Node() - p2 = n2.load(fn, c) - - assert p1 == p2 - np.testing.assert_array_equal(n1._output.data, n2._output.data) + self.node.put_cache(1, 'test') + + self.node.put_cache(1, 'test', overwrite=True) + assert self.node.get_cache('test') == 1 + + def test_rem_all(self): + self.node.put_cache(0, 'a') + self.node.put_cache(0, 'b') + self.node.put_cache(0, 'a', coordinates=self.coords) + self.node.put_cache(0, 'c', coordinates=self.coords) + self.node.put_cache(0, 'c', coordinates=self.coords2) + self.node.put_cache(0, 'd', coordinates=self.coords) + + self.node.rem_cache() + assert not self.node.has_cache('a') + assert not self.node.has_cache('b') + assert not self.node.has_cache('a', coordinates=self.coords) + assert not self.node.has_cache('c', coordinates=self.coords) + assert not self.node.has_cache('c', coordinates=self.coords2) + assert not self.node.has_cache('d', coordinates=self.coords) + + @pytest.mark.skip('BUG: Need to fix this.') + def test_rem_key(self): + self.node.put_cache(0, 'a') + self.node.put_cache(0, 'b') + self.node.put_cache(0, 'a', coordinates=self.coords) + self.node.put_cache(0, 'c', coordinates=self.coords) + self.node.put_cache(0, 'c', coordinates=self.coords2) + self.node.put_cache(0, 'd', coordinates=self.coords) + + self.node.rem_cache(key='a') + + assert not self.node.has_cache('a') + assert not self.node.has_cache('a', coordinates=self.coords) + assert self.node.has_cache('b') + assert self.node.has_cache('c', coordinates=self.coords) + assert self.node.has_cache('c', coordinates=self.coords2) + assert self.node.has_cache('d', coordinates=self.coords) + + @pytest.mark.skip('BUG: Need to fix this.') + def test_rem_coordinates(self): + self.node.put_cache(0, 'a') + self.node.put_cache(0, 'b') + self.node.put_cache(0, 'a', coordinates=self.coords) + self.node.put_cache(0, 'c', coordinates=self.coords) + self.node.put_cache(0, 'c', coordinates=self.coords2) + self.node.put_cache(0, 'd', coordinates=self.coords) + + self.node.rem_cache(coordinates=self.coords) + + assert self.node.has_cache('a') + assert not self.node.has_cache('a', coordinates=self.coords) + assert self.node.has_cache('b') + assert not self.node.has_cache('c', coordinates=self.coords) + assert self.node.has_cache('c', coordinates=self.coords2) + assert not self.node.has_cache('d', coordinates=self.coords) + + def test_rem_key_coordinates(self): + self.node.put_cache(0, 'a') + self.node.put_cache(0, 'b') + self.node.put_cache(0, 'a', coordinates=self.coords) + self.node.put_cache(0, 'c', coordinates=self.coords) + self.node.put_cache(0, 'c', coordinates=self.coords2) + self.node.put_cache(0, 'd', coordinates=self.coords) + + self.node.rem_cache(key='a', coordinates=self.coords) + + assert self.node.has_cache('a') + assert not self.node.has_cache('a', coordinates=self.coords) + assert self.node.has_cache('b') + assert self.node.has_cache('c', coordinates=self.coords) + assert self.node.has_cache('c', coordinates=self.coords2) + assert self.node.has_cache('d', coordinates=self.coords) + +class TestCachePropertyDecorator(object): + def test_cache_property_decorator(self): + class Test(podpac.Node): + a = tl.Int(1).tag(attr=True) + b = tl.Int(1).tag(attr=True) + c = tl.Int(1) + d = tl.Int(1) + + @podpac.core.node.cached_property('a2', 'a') + def a2(self): + """a2 docstring""" + return self.a * 2 + + @podpac.core.node.cached_property('b2') + def b2(self): + """ b2 docstring """ + return self.b * 2 + + @podpac.core.node.cached_property('c2', 'c') + def c2(self): + """ c2 docstring """ + return self.c * 2 + + @podpac.core.node.cached_property('d2') + def d2(self): + """ d2 docstring """ + return self.d * 2 + + t = Test(cache_type='disk') + t2 = Test(cache_type='disk') + t.rem_cache() + t2.rem_cache() + + try: + t.get_cache('a2') + raise Exception("Cache should be cleared.") + except podpac.NodeException: + pass - def test_cache_dir(self): - n = Node() - assert isinstance(n.cache_dir, six.string_types) - assert n.cache_dir.endswith('Node') - assert 'cache' in n.cache_dir + assert t.a2() == 2 + assert t.b2() == 2 + assert t.c2() == 2 + assert t.d2() == 2 + assert t2.a2() == 2 + assert t2.b2() == 2 + assert t2.c2() == 2 + assert t2.d2() == 2 + + t.a = 2 + assert t.a2() == 4 + t.b = 2 + assert t.b2() == 4 # This happens because the node definition changed + t.rem_cache() + assert t.c2() == 2 # This forces the cache to update based on the new node definition + assert t.d2() == 2 # This forces the cache to update based on the new node definition + t.c = 2 + assert t.c2() == 4 # This happens because of depends + t.d = 2 + assert t.d2() == 2 # No depends, and doesn't have a tag + + # These should not change + assert t2.a2() == 2 + assert t2.b2() == 2 + assert t2.c2() == 2 + assert t2.d2() == 2 + + t2.a = 2 + assert t2.get_cache('a2') == 4 # This was cached by t + t2.b = 2 + assert t2.get_cache('c2') == 4 # This was cached by t + assert t2.get_cache('d2') == 2 # This was cached by t + + def test_cached_property_decorator_with_no_cache(self): + class Test(podpac.Node): + a = tl.Int(1).tag(attr=True) + b = tl.Int(1).tag(attr=True) + c = tl.Int(1) + d = tl.Int(1) + + @podpac.core.node.cached_property('a2', 'a') + def a2(self): + """a2 docstring""" + return self.a * 2 + + @podpac.core.node.cached_property('b2') + def b2(self): + """ b2 docstring """ + return self.b * 2 + + @podpac.core.node.cached_property('c2', 'c') + def c2(self): + """ c2 docstring """ + return self.c * 2 + + @podpac.core.node.cached_property('d2') + def d2(self): + """ d2 docstring """ + return self.d * 2 + + t = Test(cache_type=None) + t2 = Test(cache_type=None) + t.rem_cache() + t2.rem_cache() + + try: + t.get_cache('a2') + raise Exception("Cache should be cleared.") + except podpac.NodeException: + pass - def test_cache_path(self): - n = Node() - with pytest.raises(AttributeError): - n.cache_path('testfile') - with pytest.raises(AttributeError): - n.cache_obj('testObject', 'testFileName') - with pytest.raises(AttributeError): - n.load_cached_obj('testFileName') - - @pytest.mark.skip() - def test_clear_disk_cache(self): - class N(Node): - source = 'test' - - n = N() - with pytest.raises(AttributeError): - n.clear_disk_cache() - n.clear_disk_cache(all_cache=True) - with pytest.raises(AttributeError): - n.clear_disk_cache(node_cache=True) + assert t.a2() == 2 + assert t.b2() == 2 + assert t.c2() == 2 + assert t.d2() == 2 + assert t2.a2() == 2 + assert t2.b2() == 2 + assert t2.c2() == 2 + assert t2.d2() == 2 + + t.a = 2 + assert t.a2() == 4 + t.b = 2 + assert t.b2() == 4 # This happens because the node definition changed + t.rem_cache() + assert t.c2() == 2 # This forces the cache to update based on the new node definition + assert t.d2() == 2 # This forces the cache to update based on the new node definition + t.c = 2 + assert t.c2() == 4 # This happens because of depends + t.d = 2 + assert t.d2() == 4 # No caching here, so it SHOULD update + + # These should not change + assert t2.a2() == 2 + assert t2.b2() == 2 + assert t2.c2() == 2 + assert t2.d2() == 2 # TODO: remove this - this is currently a placeholder test until we actually have integration tests (pytest will exit with code 5 if no tests found) @pytest.mark.integration diff --git a/podpac/datalib/smap.py b/podpac/datalib/smap.py index 75358635a..cffb5a3b2 100644 --- a/podpac/datalib/smap.py +++ b/podpac/datalib/smap.py @@ -45,6 +45,8 @@ def isnat(a): from podpac.core import authentication from podpac.core.utils import common_doc from podpac.core.data.datasource import COMMON_DATA_DOC +from podpac.core.node import cached_property +from podpac.core.node import NodeException COMMON_DOC = COMMON_DATA_DOC.copy() COMMON_DOC.update( @@ -142,10 +144,10 @@ def _get_from_url(url, auth_session): raise RuntimeError('HTTP error: <%d>\n' % (r.status_code) + r.text[:256]) except requests.ConnectionError as e: - warnings.warn('WARNING: ' + str(e)) + warnings.warn('WARNING: cannot connect to {}:'.format(url) + str(e)) r = None except RuntimeError as e: - warnings.warn('WARNING: ' + str(e)) + warnings.warn('WARNING: cannot authenticate to {}:'.format(url) + str(e)) return r @@ -324,13 +326,10 @@ def lonkey(self): return SMAP_PRODUCT_MAP.sel(product=self.product, attr='lonkey').item().format(rdk=self.rootdatakey) @common_doc(COMMON_DOC) + @cached_property('native.coordinates') def get_native_coordinates(self): """{get_native_coordinates} """ - try: - return self.load_cached_obj('native.coordinates') - except: - pass times = self.get_available_times() ds = self.dataset lons = np.array(ds[self.lonkey][:, :]) @@ -340,7 +339,6 @@ def get_native_coordinates(self): lons = np.nanmean(lons, axis=0) lats = np.nanmean(lats, axis=1) coords = podpac.Coordinates([times, lats, lons], dims=['time', 'lat', 'lon']) - self.cache_obj(coords, 'native.coordinates') return coords def get_available_times(self): @@ -444,22 +442,18 @@ def _layerkey_default(self): return '{rdk}' + self.property @common_doc(COMMON_DOC) + @cached_property('native.coordinates') def get_native_coordinates(self): """{get_native_coordinates} """ - try: - coords = self.load_cached_obj('native.coordinates') - except: - ds = self.dataset - lons = np.array(ds[self.lonkey][:, :]) - lats = np.array(ds[self.latkey][:, :]) - lons[lons == self.nan_vals[0]] = np.nan - lats[lats == self.nan_vals[0]] = np.nan - lons = np.nanmean(lons, axis=0) - lats = np.nanmean(lats, axis=1) - coords = podpac.Coordinates([lats, lons], dims=['lat', 'lon']) - self.cache_obj(coords, 'native.coordinates') - + ds = self.dataset + lons = np.array(ds[self.lonkey][:, :]) + lats = np.array(ds[self.latkey][:, :]) + lons[lons == self.nan_vals[0]] = np.nan + lats[lats == self.nan_vals[0]] = np.nan + lons = np.nanmean(lons, axis=0) + lats = np.nanmean(lats, axis=1) + coords = podpac.Coordinates([lats, lons], dims=['lat', 'lon']) return coords class SMAPPorosity(SMAPProperties): @@ -521,6 +515,7 @@ class SMAPDateFolder(podpac.compositor.OrderedCompositor): auth_class = tl.Type(authentication.EarthDataSession) username = tl.Unicode(None, allow_none=True) password = tl.Unicode(None, allow_none=True) + cache_type = tl.Enum([None, 'disk', 'ram'], allow_none=True, default_value='disk') @tl.default('auth_session') def _auth_session_default(self): @@ -581,9 +576,12 @@ def sources_default(self): # break. Hence, try to get the new source everytime, unless data is offline, in which case rely on the cache. try: _, _, sources = self.get_available_coords_sources() - self.cache_obj(sources, 'sources') - except: # - sources = self.load_cached_obj('sources') + self.put_cache(sources, 'sources', overwrite=True) + except: # No internet or authentication error + try: + sources = self.get_cache('sources') + except NodeException as e: + raise NodeException("Connection or Authentication error, and no disk cache to fall back on for determining sources.") b = self.source + '/' @@ -621,15 +619,19 @@ def get_source_coordinates(self): try: times, latlon, _ = self.get_available_coords_sources() except: - return self.load_cached_obj('source.coordinates') + try: + return self.get_cache('source.coordinates') + except NodeException as e: + raise NodeException("Connection or Authentication error, and no disk cache to fall back on for determining sources.") if latlon is not None and latlon.size > 0: crds = podpac.Coordinates([[times, latlon[:, 0], latlon[:, 1]]], dims=['time_lat_lon']) else: crds = podpac.Coordinates([times], dims=['time']) - self.cache_obj(crds, 'source.coordinates') + self.put_cache(crds, 'source.coordinates', overwrite=True) return crds + @cached_property('shared.coordinates') def get_shared_coordinates(self): """Coordinates that are shared by all files in the folder. @@ -641,14 +643,8 @@ def get_shared_coordinates(self): if self.product in SMAP_INCOMPLETE_SOURCE_COORDINATES: return None - try: - return self.load_cached_obj('shared.coordinates') - except: - pass - coords = copy.deepcopy(self.sources[0].native_coordinates) del coords._coords['time'] - self.cache_obj(coords, 'shared.coordinates') return coords def get_available_coords_sources(self): @@ -671,6 +667,7 @@ def get_available_coords_sources(self): url = self.source r = _get_from_url(url, self.auth_session) if r is None: + warnings.warn("WARNING: Could not contact {} to retrieve source coordinates".format(url)) return np.array([]), None, np.array([]) soup = bs4.BeautifulSoup(r.text, 'lxml') a = soup.find_all('a') @@ -767,6 +764,8 @@ def _detect_product_version(self): username = tl.Unicode(None, allow_none=True) password = tl.Unicode(None, allow_none=True) + cache_type = tl.Enum([None, 'disk', 'ram'], allow_none=True, default_value='disk') + @tl.default('auth_session') def _auth_session_default(self): session = self.auth_class(username=self.username, password=self.password, hostname_regex=SMAP_BASE_URL_REGEX) @@ -857,6 +856,7 @@ def get_available_times_dates(self): url = '/'.join([self.base_url, '%s.%03d' % (self.product, self.version)]) r = _get_from_url(url, self.auth_session) if r is None: + warnings.warn("WARNING: Could not contact {} to retrieve source coordinates".format(url)) return np.array([]), [] soup = bs4.BeautifulSoup(r.text, 'lxml') a = soup.find_all('a') @@ -872,6 +872,7 @@ def get_available_times_dates(self): dates.sort() return np.array(times), dates + @cached_property('shared.coordinates') def get_shared_coordinates(self): """Coordinates that are shared by all files in the SMAP product family. @@ -889,16 +890,10 @@ def get_shared_coordinates(self): if self.product in SMAP_INCOMPLETE_SOURCE_COORDINATES: return None - try: - return self.load_cached_obj('shared.coordinates') - except: - pass - coords = SMAPDateFolder(product=self.product, version=self.version, folder_date=self.get_available_times_dates()[1][0], auth_session=self.auth_session, ).shared_coordinates - self.cache_obj(coords, 'shared.coordinates') return coords def get_filename_coordinates_sources(self, bounds=None): @@ -926,12 +921,15 @@ def get_filename_coordinates_sources(self, bounds=None): If 'bounds' is not specified, the result is cached for faster future access. """ - if bounds is None: - try: - return (self.load_cached_obj('filename.coordinates'), - self.load_cached_obj('filename.sources')) - except: - pass + try: + crds, sources = (self.get_cache('filename.coordinates'), + self.get_cache('filename.sources')) + if bounds: + crds, I = crds.intersect(bounds, outer=True, return_indices=True) + sources = np.array(sources)[I].tolist() + return crds, sources + except NodeException: # Not in cache + pass if bounds is None: active_sources = self.sources @@ -949,8 +947,8 @@ def get_filename_coordinates_sources(self, bounds=None): #crds = crds + self.shared_coordinates sources = np.concatenate(sources) if bounds is None: - self.cache_obj(crds, 'filename.coordinates') - self.cache_obj(sources, 'filename.sources') + self.put_cache(crds, 'filename.coordinates') + self.put_cache(sources, 'filename.sources') return crds, sources @property From be9ed0189c8c6f7bc4d038b302dd681cded5d508 Mon Sep 17 00:00:00 2001 From: Matt Ueckermann Date: Thu, 29 Nov 2018 14:19:05 -0500 Subject: [PATCH 19/25] ENH: Making SMAP_BASE_URL checking lazy, so we don't get a warning at import. --- podpac/datalib/smap.py | 83 +++++++++++++++++++++++++----------------- 1 file changed, 50 insertions(+), 33 deletions(-) diff --git a/podpac/datalib/smap.py b/podpac/datalib/smap.py index cffb5a3b2..406804598 100644 --- a/podpac/datalib/smap.py +++ b/podpac/datalib/smap.py @@ -2,7 +2,7 @@ Attributes ---------- -SMAP_BASE_URL : str +SMAP_BASE_URL() : str Url to nsidc openDAP server SMAP_INCOMPLETE_SOURCE_COORDINATES : list List of products whose source coordinates are incomplete. This means any shared coordinates cannot be extracted @@ -196,29 +196,39 @@ def _infer_SMAP_product_version(product, base_url, auth_session): # Discover SMAP OpenDAP url from podpac s3 server SMAP_BASE_URL_FILE = os.path.join(os.path.dirname(__file__), 'nsidc_smap_opendap_url.txt') -SMAP_BASE_URL = 'https://n5eil01u.ecs.nsidc.org/opendap/SMAP' -try: - with open(SMAP_BASE_URL_FILE, 'r') as fid: - rf = fid.read() - if 'https://' in rf and 'nsidc.org' in rf: - SMAP_BASE_URL = rf -except Exception as e: - warnings.warn("Could not retrieve SMAP url from %s: " % (SMAP_BASE_URL_FILE) + str(e)) -try: - r = requests.get('https://s3.amazonaws.com/podpac-s3/settings/nsidc_smap_opendap_url.txt').text - if 'https://' in r and 'nsidc.org' in r: - if rf != r: - warnings.warn("Updating SMAP url from PODPAC S3 Server.") - SMAP_BASE_URL = r - try: - with open(SMAP_BASE_URL_FILE, 'w') as fid: - fid.write(r) - except Exception as e: - warnings.warn("Could not overwrite SMAP url update on disk:" + str(e)) -except Exception as e: - warnings.warn("Could not retrieve SMAP url from PODPAC S3 Server. Using default." + str(e)) - -SMAP_BASE_URL_REGEX = re.compile(re.sub(r'\d', r'\\d', SMAP_BASE_URL.split('/')[2])) +_SMAP_BASE_URL = None +def SMAP_BASE_URL(): + global _SMAP_BASE_URL + if _SMAP_BASE_URL is not None: + return _SMAP_BASE_URL + BASE_URL = 'https://n5eil01u.ecs.nsidc.org/opendap/SMAP' + try: + with open(SMAP_BASE_URL_FILE, 'r') as fid: + rf = fid.read() + if 'https://' in rf and 'nsidc.org' in rf: + BASE_URL = rf + except Exception as e: + warnings.warn("Could not retrieve SMAP url from %s: " % (SMAP_BASE_URL_FILE) + str(e)) + try: + r = requests.get('https://s3.amazonaws.com/podpac-s3/settings/nsidc_smap_opendap_url.txt').text + if 'https://' in r and 'nsidc.org' in r: + if rf != r: + warnings.warn("Updating SMAP url from PODPAC S3 Server.") + BASE_URL = r + try: + with open(SMAP_BASE_URL_FILE, 'w') as fid: + fid.write(r) + except Exception as e: + warnings.warn("Could not overwrite SMAP url update on disk:" + str(e)) + except Exception as e: + warnings.warn("Could not retrieve SMAP url from PODPAC S3 Server. Using default." + str(e)) + _SMAP_BASE_URL = BASE_URL + return BASE_URL + +def SMAP_BASE_URL_REGEX(): + return re.compile(re.sub(r'\d', r'\\d', SMAP_BASE_URL().split('/')[2])) + + @common_doc(COMMON_DOC) class SMAPSource(datatype.PyDAP): """Accesses SMAP data given a specific openDAP URL. This is the base class giving access to SMAP data, and knows how @@ -246,10 +256,10 @@ class SMAPSource(datatype.PyDAP): @tl.default('auth_session') def _auth_session_default(self): session = self.auth_class( - username=self.username, password=self.password, hostname_regex=SMAP_BASE_URL_REGEX) + username=self.username, password=self.password, hostname_regex=SMAP_BASE_URL_REGEX()) # check url try: - session.get(SMAP_BASE_URL) + session.get(SMAP_BASE_URL()) except Exception as e: print("Unknown exception: ", e) return session @@ -408,7 +418,7 @@ class SMAPProperties(SMAPSource): 'cell_land_fraction', 'mwrtm_omega', 'mwrtm_soilcls', 'clsm_dzgt6', 'mwrtm_rghnrv', 'mwrtm_clay', 'mwrtm_sand' source : str, optional - Source OpenDAP url for SMAP properties. Default is (SMAP_BASE_URL + + Source OpenDAP url for SMAP properties. Default is (SMAP_BASE_URL() + 'SPL4SMLM{latest_version}/2015.03.31/' 'SMAP_L4_SM_lmc_00000000T000000_Vv{latest_version}.h5') """ @@ -417,8 +427,8 @@ class SMAPProperties(SMAPSource): source = tl.Unicode() @tl.default('source') def _property_source_default(self): - v = _infer_SMAP_product_version('SPL4SMLM', SMAP_BASE_URL, self.auth_session) - url = SMAP_BASE_URL + \ + v = _infer_SMAP_product_version('SPL4SMLM', SMAP_BASE_URL(), self.auth_session) + url = SMAP_BASE_URL() + \ '/SPL4SMLM.%03d/2015.03.31/' % (v) r = _get_from_url(url, self.auth_session) if not r: @@ -519,10 +529,14 @@ class SMAPDateFolder(podpac.compositor.OrderedCompositor): @tl.default('auth_session') def _auth_session_default(self): - session = self.auth_class(username=self.username, password=self.password, hostname_regex=SMAP_BASE_URL_REGEX) + session = self.auth_class(username=self.username, password=self.password, hostname_regex=SMAP_BASE_URL_REGEX()) return session - base_url = tl.Unicode(SMAP_BASE_URL).tag(attr=True) + base_url = tl.Unicode().tag(attr=True) + @tl.default('base_url') + def _base_url_default(self): + return SMAP_BASE_URL() + product = tl.Enum(SMAP_PRODUCT_MAP.coords['product'].data.tolist()).tag(attr=True) version = tl.Int(allow_none=True).tag(attr=True) @tl.default('version') @@ -749,7 +763,10 @@ class SMAP(podpac.compositor.OrderedCompositor): username : {username} """ - base_url = tl.Unicode(SMAP_BASE_URL).tag(attr=True) + base_url = tl.Unicode().tag(attr=True) + @tl.default('base_url') + def _base_url_default(self): + return SMAP_BASE_URL() product = tl.Enum(SMAP_PRODUCT_MAP.coords['product'].data.tolist(), default_value='SPL4SMAU').tag(attr=True) version = tl.Int(allow_none=True).tag(attr=True) @@ -768,7 +785,7 @@ def _detect_product_version(self): @tl.default('auth_session') def _auth_session_default(self): - session = self.auth_class(username=self.username, password=self.password, hostname_regex=SMAP_BASE_URL_REGEX) + session = self.auth_class(username=self.username, password=self.password, hostname_regex=SMAP_BASE_URL_REGEX()) return session layerkey = tl.Unicode() From ab36032cf232ded0ca10291962b75797a170e6a6 Mon Sep 17 00:00:00 2001 From: Matt Ueckermann Date: Thu, 29 Nov 2018 15:39:54 -0500 Subject: [PATCH 20/25] BUGFIX: cache_dir has been removed. So WCS and S3 nodes now have to use settings.CACHE_DIR. --- podpac/core/data/types.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/podpac/core/data/types.py b/podpac/core/data/types.py index 1ef12af39..2838f8c3c 100644 --- a/podpac/core/data/types.py +++ b/podpac/core/data/types.py @@ -889,7 +889,7 @@ def get_data(self, coordinates, coordinates_index): output.data[i, ...] = dataset.read() except Exception as e: # Probably python 2 print(e) - tmppath = os.path.join(self.cache_dir, 'wcs_temp.tiff') + tmppath = os.path.join(podpac.settings.CACHE_DIR, 'wcs_temp.tiff') if not os.path.exists(os.path.split(tmppath)[0]): os.makedirs(os.path.split(tmppath)[0]) @@ -955,7 +955,7 @@ def get_data(self, coordinates, coordinates_index): except Exception as e: # Probably python 2 print(e) tmppath = os.path.join( - self.cache_dir, 'wcs_temp.tiff') + podpac.settings.CACHE_DIR, 'wcs_temp.tiff') if not os.path.exists(os.path.split(tmppath)[0]): os.makedirs(os.path.split(tmppath)[0]) open(tmppath, 'wb').write(content) @@ -1146,7 +1146,7 @@ def s3_data_default(self): #self.source.replace('\\', '').replace(':','')\ #.replace('/', '')) tmppath = os.path.join( - self.cache_dir, + podpac.settings.CACHE_DIR, self.source.replace('\\', '').replace(':', '').replace('/', '')) rootpath = os.path.split(tmppath)[0] From 19554a37c20e31a669881ad53d1d67ed43427e40 Mon Sep 17 00:00:00 2001 From: Matt Ueckermann Date: Thu, 29 Nov 2018 15:46:14 -0500 Subject: [PATCH 21/25] BUGFIX: Indentation error from last commit. --- podpac/core/data/types.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/podpac/core/data/types.py b/podpac/core/data/types.py index 2838f8c3c..e7d82251b 100644 --- a/podpac/core/data/types.py +++ b/podpac/core/data/types.py @@ -889,7 +889,7 @@ def get_data(self, coordinates, coordinates_index): output.data[i, ...] = dataset.read() except Exception as e: # Probably python 2 print(e) - tmppath = os.path.join(podpac.settings.CACHE_DIR, 'wcs_temp.tiff') + tmppath = os.path.join(podpac.settings.CACHE_DIR, 'wcs_temp.tiff') if not os.path.exists(os.path.split(tmppath)[0]): os.makedirs(os.path.split(tmppath)[0]) From 96b2e2ae7110fc25d1520fec723a41171a62dc00 Mon Sep 17 00:00:00 2001 From: Matt Ueckermann Date: Thu, 29 Nov 2018 16:37:38 -0500 Subject: [PATCH 22/25] BUGFIX: podpac.settings --> settings. Trying to fix things too rapidly before boarding airplane. :) --- podpac/core/data/types.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/podpac/core/data/types.py b/podpac/core/data/types.py index e7d82251b..44056aa65 100644 --- a/podpac/core/data/types.py +++ b/podpac/core/data/types.py @@ -889,7 +889,7 @@ def get_data(self, coordinates, coordinates_index): output.data[i, ...] = dataset.read() except Exception as e: # Probably python 2 print(e) - tmppath = os.path.join(podpac.settings.CACHE_DIR, 'wcs_temp.tiff') + tmppath = os.path.join(settings.CACHE_DIR, 'wcs_temp.tiff') if not os.path.exists(os.path.split(tmppath)[0]): os.makedirs(os.path.split(tmppath)[0]) @@ -955,7 +955,7 @@ def get_data(self, coordinates, coordinates_index): except Exception as e: # Probably python 2 print(e) tmppath = os.path.join( - podpac.settings.CACHE_DIR, 'wcs_temp.tiff') + settings.CACHE_DIR, 'wcs_temp.tiff') if not os.path.exists(os.path.split(tmppath)[0]): os.makedirs(os.path.split(tmppath)[0]) open(tmppath, 'wb').write(content) @@ -1146,7 +1146,7 @@ def s3_data_default(self): #self.source.replace('\\', '').replace(':','')\ #.replace('/', '')) tmppath = os.path.join( - podpac.settings.CACHE_DIR, + settings.CACHE_DIR, self.source.replace('\\', '').replace(':', '').replace('/', '')) rootpath = os.path.split(tmppath)[0] From cdb67642b48e2e9d2a0c93242f5c2447bdf224c7 Mon Sep 17 00:00:00 2001 From: Matt Ueckermann Date: Fri, 30 Nov 2018 16:02:58 -0500 Subject: [PATCH 23/25] ENH: renaming decorator. Fixing filepath sanitization. --- podpac/core/cache/cache.py | 7 +++---- podpac/core/node.py | 36 +++++++++++++++++++----------------- podpac/datalib/smap.py | 10 +++++----- 3 files changed, 27 insertions(+), 26 deletions(-) diff --git a/podpac/core/cache/cache.py b/podpac/core/cache/cache.py index cc31c4626..b2abaefbb 100644 --- a/podpac/core/cache/cache.py +++ b/podpac/core/cache/cache.py @@ -316,8 +316,7 @@ def cache_dir(self, node): basedir = self._root_dir_path subdir = str(node.__class__)[8:-2].split('.') dirs = [basedir] + subdir - # TODO: Use a function to sanitize directory name - return os.path.join(*dirs).replace('<', '').replace('>', '') + return self.cleanse_filename_str(os.path.join(*dirs)) def cache_filename(self, node, key, coordinates): pre = str(node.base_ref).replace('/', '_').replace('\\', '_').replace(':', '_') @@ -332,7 +331,7 @@ def cache_filename(self, node, key, coordinates): def cache_glob(self, node, key, coordinates): pre = '*' nKeY = 'nKeY{}'.format(self.hash_node(node)) - kKeY = 'kKeY*' if key == '*' else 'kKeY{}'.format(self.hash_key(key)) + kKeY = 'kKeY*' if key == '*' else 'kKeY{}'.format(self.cleanse_filename_str(self.hash_key(key))) cKeY = 'cKeY*' if coordinates == '*' else 'cKeY{}'.format(self.hash_coordinates(coordinates)) filename = '_'.join([pre, nKeY, kKeY, cKeY]) filename = filename + '.' + self._extension @@ -342,7 +341,7 @@ def cache_path(self, node, key, coordinates): return os.path.join(self.cache_dir(node), self.cache_filename(node, key, coordinates)) def cleanse_filename_str(self, s): - s = s.replace('/', '_').replace('\\', '_').replace(':', '_') + s = s.replace('/', '_').replace('\\', '_').replace(':', '_').replace('<', '_').replace('_', '') s = s.replace('nKeY', 'xxxx').replace('kKeY', 'xxxx').replace('cKeY', 'xxxx') return s diff --git a/podpac/core/node.py b/podpac/core/node.py index 9fcfb4351..991090002 100644 --- a/podpac/core/node.py +++ b/podpac/core/node.py @@ -590,7 +590,7 @@ def wrapper(self, coordinates, output=None): return data return wrapper -def cached_property(key, depends=None, raise_no_cache_exception=False): +def cache_func(key, depends=None, raise_no_cache_exception=False): """ Decorating for caching a function's output based on a key. @@ -599,7 +599,7 @@ def cached_property(key, depends=None, raise_no_cache_exception=False): key: str Key used for caching. depends: str, list, traitlets.All (optional) - Default is None. Any traits that the cached property depends on. The cached_property may NOT + Default is None. Any traits that the cached property depends on. The cached function may NOT change the value of any of these dependencies (this will result in a RecursionError) raise_no_cache_exception: bool, optional Raises a NodeException if trying to put data to the cache, but no cache is available. @@ -614,34 +614,36 @@ def cached_property(key, depends=None, raise_no_cache_exception=False): Examples ---------- + >>> from podpac import Node + >>> from podpac.core.node import cache_func + >>> import traitlets as tl >>> class MyClass(Node): - value = 0 - @cached_func('native_coordinates') - def square_value(self): + value = tl.Int(0) + @cache_func('add') + def add_value(self): self.value += 1 return self.value - @cached_func('native_coordinates', depends='value') + @cache_func('square', depends='value') def square_value_depends(self): return self.value >>> n = MyClass() - >>> n.get_value() # The function as defined is called + >>> n.add_value() # The function as defined is called 1 - >>> n.get_value() # The function as defined is called again, since we have no caching specified + >>> n.add_value() # The function as defined is called again, since we have no caching specified 2 >>> n.cache_type = 'disk' - >>> n.get_value() # The function as defined is called again, and the value is stored to disk + >>> n.add_value() # The function as defined is called again, and the value is stored to disk 3 - >>> n.get_value() # The value is retrieved from disk, note the change in n.value is not captured + >>> n.add_value() # The value is retrieved from disk, note the change in n.value is not captured 3 - >>> n.get_value_depends() # The function as defined is called, and the value is stored to disk - 4 - >>> n.get_value_depends() # The value is retrieved from disk - 4 - + >>> n.square_value_depends() # The function as defined is called, and the value is stored to disk + 16 + >>> n.square_value_depends() # The value is retrieved from disk + 16 >>> n.value += 1 - >>> n.get_value_depends() # The function as defined is called, and the value is stored to disk. Note the change in n.value is captured. - 5 + >>> n.square_value_depends() # The function as defined is called, and the value is stored to disk. Note the change in n.value is captured. + 25 """ # This is the actual decorator which will be evaluated and returns the wrapped function def cache_decorator(func): diff --git a/podpac/datalib/smap.py b/podpac/datalib/smap.py index 406804598..7e29ae440 100644 --- a/podpac/datalib/smap.py +++ b/podpac/datalib/smap.py @@ -45,7 +45,7 @@ def isnat(a): from podpac.core import authentication from podpac.core.utils import common_doc from podpac.core.data.datasource import COMMON_DATA_DOC -from podpac.core.node import cached_property +from podpac.core.node import cache_func from podpac.core.node import NodeException COMMON_DOC = COMMON_DATA_DOC.copy() @@ -336,7 +336,7 @@ def lonkey(self): return SMAP_PRODUCT_MAP.sel(product=self.product, attr='lonkey').item().format(rdk=self.rootdatakey) @common_doc(COMMON_DOC) - @cached_property('native.coordinates') + @cache_func('native.coordinates') def get_native_coordinates(self): """{get_native_coordinates} """ @@ -452,7 +452,7 @@ def _layerkey_default(self): return '{rdk}' + self.property @common_doc(COMMON_DOC) - @cached_property('native.coordinates') + @cache_func('native.coordinates') def get_native_coordinates(self): """{get_native_coordinates} """ @@ -645,7 +645,7 @@ def get_source_coordinates(self): self.put_cache(crds, 'source.coordinates', overwrite=True) return crds - @cached_property('shared.coordinates') + @cache_func('shared.coordinates') def get_shared_coordinates(self): """Coordinates that are shared by all files in the folder. @@ -889,7 +889,7 @@ def get_available_times_dates(self): dates.sort() return np.array(times), dates - @cached_property('shared.coordinates') + @cache_func('shared.coordinates') def get_shared_coordinates(self): """Coordinates that are shared by all files in the SMAP product family. From 093809e47a52ed9f677f9363706a7eabd2996c4d Mon Sep 17 00:00:00 2001 From: Matt Ueckermann Date: Fri, 30 Nov 2018 16:11:12 -0500 Subject: [PATCH 24/25] BUG: Fixing file path sanitization again. --- podpac/core/cache/cache.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/podpac/core/cache/cache.py b/podpac/core/cache/cache.py index b2abaefbb..0a0466c0f 100644 --- a/podpac/core/cache/cache.py +++ b/podpac/core/cache/cache.py @@ -316,7 +316,7 @@ def cache_dir(self, node): basedir = self._root_dir_path subdir = str(node.__class__)[8:-2].split('.') dirs = [basedir] + subdir - return self.cleanse_filename_str(os.path.join(*dirs)) + return (os.path.join(*dirs)).replace(':', '_').replace('<', '_').replace('>', '_') def cache_filename(self, node, key, coordinates): pre = str(node.base_ref).replace('/', '_').replace('\\', '_').replace(':', '_') @@ -341,7 +341,7 @@ def cache_path(self, node, key, coordinates): return os.path.join(self.cache_dir(node), self.cache_filename(node, key, coordinates)) def cleanse_filename_str(self, s): - s = s.replace('/', '_').replace('\\', '_').replace(':', '_').replace('<', '_').replace('_', '') + s = s.replace('/', '_').replace('\\', '_').replace(':', '_').replace('<', '_').replace('>', '_') s = s.replace('nKeY', 'xxxx').replace('kKeY', 'xxxx').replace('cKeY', 'xxxx') return s From 0cd62578063246e7d284f8c376ad1f0507ea56aa Mon Sep 17 00:00:00 2001 From: Matt Ueckermann Date: Fri, 30 Nov 2018 16:19:44 -0500 Subject: [PATCH 25/25] TESTFIX: Fixing unit tests with renamed decorator. --- podpac/core/test/test_node.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/podpac/core/test/test_node.py b/podpac/core/test/test_node.py index 6c9ec9627..84f80161f 100644 --- a/podpac/core/test/test_node.py +++ b/podpac/core/test/test_node.py @@ -364,22 +364,22 @@ class Test(podpac.Node): c = tl.Int(1) d = tl.Int(1) - @podpac.core.node.cached_property('a2', 'a') + @podpac.core.node.cache_func('a2', 'a') def a2(self): """a2 docstring""" return self.a * 2 - @podpac.core.node.cached_property('b2') + @podpac.core.node.cache_func('b2') def b2(self): """ b2 docstring """ return self.b * 2 - @podpac.core.node.cached_property('c2', 'c') + @podpac.core.node.cache_func('c2', 'c') def c2(self): """ c2 docstring """ return self.c * 2 - @podpac.core.node.cached_property('d2') + @podpac.core.node.cache_func('d2') def d2(self): """ d2 docstring """ return self.d * 2 @@ -428,29 +428,29 @@ def d2(self): assert t2.get_cache('c2') == 4 # This was cached by t assert t2.get_cache('d2') == 2 # This was cached by t - def test_cached_property_decorator_with_no_cache(self): + def test_cache_func_decorator_with_no_cache(self): class Test(podpac.Node): a = tl.Int(1).tag(attr=True) b = tl.Int(1).tag(attr=True) c = tl.Int(1) d = tl.Int(1) - @podpac.core.node.cached_property('a2', 'a') + @podpac.core.node.cache_func('a2', 'a') def a2(self): """a2 docstring""" return self.a * 2 - @podpac.core.node.cached_property('b2') + @podpac.core.node.cache_func('b2') def b2(self): """ b2 docstring """ return self.b * 2 - @podpac.core.node.cached_property('c2', 'c') + @podpac.core.node.cache_func('c2', 'c') def c2(self): """ c2 docstring """ return self.c * 2 - @podpac.core.node.cached_property('d2') + @podpac.core.node.cache_func('d2') def d2(self): """ d2 docstring """ return self.d * 2