diff --git a/.travis.yml b/.travis.yml index 633370796..bfe08f39d 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,6 +1,6 @@ # Travis CI Podpac Build # Builds for python 2.x and python 3.x -# +# # Useful links: # https://docs.travis-ci.com/user/languages/python/ # https://conda.io/docs/user-guide/tasks/use-conda-with-travis-ci.html @@ -28,18 +28,18 @@ install: # install setup.py and dev extras - pip install coverage==4.5.4 - pip install .[devall] - + # Allow Python exec and eval functions for unit tests - mkdir /home/travis/.podpac - touch /home/travis/.podpac/ALLOW_PYTHON_EVAL_EXEC - + # cache pip dependencies for faster builds cache: pip # run unit tests -script: - - pytest --ci --cov=podpac podpac # run unit tests with coverage - - pytest --ci -m integration podpac # run integration tests +script: + - pytest --ci --cov=podpac podpac -v --color=yes -m "not integration" # run unit tests with coverage + # - pytest --ci -m integration podpac # run integration tests # run doctest - cd doc && ./test-docs.sh && cd .. diff --git a/CHANGELOG.md b/CHANGELOG.md index e34599f98..308c5f826 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,12 +1,76 @@ # Changelog +## 3.1.0 + +This release was in support of the GeoWATCH application. Bugs/features added were to support server deployment. + +### Features +* Added `OGR` datasource node for reading shapefiles +* `Compositers.multithreading`: For some compositors, it's important to actually evaluate the nodes in serial for performance reasons, regardless of the global multithreading setting. Now compositors user settings['MULTITHREADING'] by default, but `OrderedCompositors` always set this to `False`. In either case it can be overwritten on a node-by-node basis. +* `RasterioSource.prefer_overview_closest`: when selecting overview levels, we can either select the coarsest overview smaller than the eval coordinates OR we can select the overview with the closest step size to the eval coordinates (this may be coarser than the eval coordinates). Setting this attr to `True` will select the closest overview instead of the closest higher resolution overview. +* Improved speed of evaluations by eliminating unneccessary CRS validations +* Added `decode_cf` attribute to `Dataset` data source node +* Default interpolation can now be specificief application-wide through the `podpac.settings["DEFAULT_INTERPOLATION"]` setting +* Added `MockWCSClient` to `ogc.py` for WCS endpoints that do not implement `get_coverage`. This make it easy to turn PODPAC into a lightweight WCS server, and then use a PODPAC WCS client. +* Added `prefer_overviews` and `prefer_overviews_closest` attributes to `Rasterio` data source node. These attributes allow users to pull from the overviews directly for coarse requests. +* Added the point prober. This allows users to probe the values of an algorithm pipeline at a point. See `Node.probe` +* Added the `from_name_params` method to `Node`, allowing nodes to be created from the node name + additional parameters. +* Renamed `set_unsafe_eval` to `allow_unrestricted_code_execution` for a more descriptive name. +* Improved specification of enumerated colormaps in the `Style` +* Enabled saving to a geotiff memory file to support WCS calls + +## Bugfixes +* Fixed crs mismatch bug in `Reproject` node +* Fixed lat/lon ordering bug for different versions of WMS/WCS in `from_url` method of `Coordinates` +* Fixed bug in `Coordinates.transform` where `ArrayCoordinates` turned into `UniformCoordinates` for two CRS with linear mapping. +* Fixed bug in `DataSource` node where `get_data` returns coordinates that are different from the request (this happens in the case where raw data is returned) +* Fixed BBOX order specification error in `WCS` node, where different versions of WCS change the order of lat/lon. This is now handled correctly. +* Fixed a number of interpolation errors: + * `InterpolationMixin` will no longer cache internal evaluations which lead to strange caching errors + * Fixed selector bugs related to negative step sizes + * Fixed nearest neighbor interpolation bugs related to negative step sizes + * Fixed Selector uniform coordinates short-cut +* Fixed bug where `DataArray` attributes were dropped when doing basic math operations +* Fixed bug in `to_geotiff` export function (misplaced parenthesis) + +## 3.0.0 +Interpolation refactoring. Interpolation now lives as an Algorithm Node. As such, +interpolation can exist in any part of a pipeline, and even multiple times. As +part of this improvement, we also implemented "Selectors" which subselect data +based on the interpolation method specified BEFORE data is pulled from remote +servers. + +Because this refactor changed the interface somewhat, we bumped the major version number. + +The MAJOR change with the PODPAC functionality is that now some Nodes may return DIFFERENT (not interpolated) coordinates than the eval coordinates. + +### Features +* Added `Interpolation` Node and `InterpolationMixin` to restore backwards compatibility with most nodes. +* Replace WCS node with a new version that uses owslib under the hood. Also added authentiation support. +* Added SoilGrids WCS data sources +* Added an "Xarray" interpolator, which uses `xarray`'s interpolation methods. This now allows linear project for time, for example. +* Interpolators will now throw warning if the user specifies an interpolation parameter which is not used. +* Improved interpolation documentation +* Added "Autozoom" functionality for TerrainTiles datasource +* Added `Compositor` nodes that combine multiple files/tiles of a single datasource BEFORE interpolation +* Removed SMAP PyDAP datalib -- it was always unstable whereas the EGI version usually works +* Improved Rasterio node -- it now read datasources directly using Rasterio instead of going through s3fs. + +### Bugfixes +* Can now clear ram cache before cache is eliminated +* Fixed #303, UnitsDataArray deserialization +* Removed support for "numpy" return type in Algorithm nodes, since coordinates can now be altered in Algorithm Nodes +* Fixed styling and plugin information is being set 7aef43b5a +* Fixed some floating point rounding issues at tile edges 8ac834d4 +* Fixed Coordinates.from_url to work correctly with different versions of OCG WMS call (and possible WCS calls, but the WCS documentation and my reference servers disagree...) + ## 2.3.0 -### Introduction +### Introduction Adding subdataset support for hdf4 data sources (i.e. downloaded MODIS netcdf file), wrapping SoilScape data, and adding -expiration to cache. +expiration to cache. -This release also drops Python 3.5 support. +This release also drops Python 3.5 support. ### Features * Subdataset support in Rasterio Node, see #410 @@ -22,7 +86,7 @@ This release also drops Python 3.5 support. ### Bug Fixes * Fixed floating point errors on selection of data subset (short circuit optimization to avoid unnecessary interpolation) * Fixed bug in cosmos_stations.latlon_from_label giving the wrong latlon for a label -* Fixing compositor to update interpolation of sources automatically (and deleting cached definitions). +* Fixing compositor to update interpolation of sources automatically (and deleting cached definitions). * Also making cached node definitions easier to remove -- no longer caching node.json, node.json_pretty and node.hash ## 2.2.0 @@ -35,27 +99,27 @@ Wrapping Landsat8, Sentinel2, and MODIS data and improving interpolation. * Added `datalib.modis_pds` which wraps MODIS products ["MCD43A4.006", "MOD09GA.006", "MYD09GA.006", "MOD09GQ.006", "MYD09GQ.006"] * Added settings['AWS_REQUESTER_PAYS'] and `authentication.S3Mixing.aws_requester_pays` attribute to support Sentinel2 data * Added `issubset` method to Coordinates which allows users to test if a coordinate is a subset of another one -* Added environmental variables in Lambda function deployment allowing users to specify the location of additional -dependencies (`FUNCTION_DEPENDENCIES_KEY`) and settings (`SETTINGS`). This was in support the WMS service. +* Added environmental variables in Lambda function deployment allowing users to specify the location of additional +dependencies (`FUNCTION_DEPENDENCIES_KEY`) and settings (`SETTINGS`). This was in support the WMS service. * Intake nodes can now filter inputs by additional data columns for .csv files / pandas dataframes by using the pandas -`query` method. +`query` method. * Added documentation on `Interpolation` and `Wrapping Datasets` ### Bug Fixes -* Added `dims` attributes to `Compositor` nodes which indicates the dimensions that sources are expected to have. This +* Added `dims` attributes to `Compositor` nodes which indicates the dimensions that sources are expected to have. This fixes a bug where `Nodes` throw and error if Coordinates contain extra dimensions when the `Compositor` sources are missing those dimensions. -* `COSMOSStations` will no longer fail for sites with no data or one data point. These sites are now automatically filtered. +* `COSMOSStations` will no longer fail for sites with no data or one data point. These sites are now automatically filtered. * Fixed `core.data.file_source` closing files prematurely due to using context managers * Fixed heterogenous interpolation (where lat/lon uses a different interpolator than time, for example) -* `datalib.TerrainTiles` now accesses S3 anonymously by default. Interpolation specified at the compositor level are -also now passed down to the sources. +* `datalib.TerrainTiles` now accesses S3 anonymously by default. Interpolation specified at the compositor level are +also now passed down to the sources. ### Breaking changes * Fixed `core.algorithm.signal.py` and in the process removed `SpatialConvolution` and `TemporalConvolutions`. Users now -have to label the dimensions of the kernel -- which prevents results from being modified if the eval coordinates are -transposed. This was a major bug in the `Convolution` node, and the new change obviates the need for the removed Nodes, -but it may break some pipelines. +have to label the dimensions of the kernel -- which prevents results from being modified if the eval coordinates are +transposed. This was a major bug in the `Convolution` node, and the new change obviates the need for the removed Nodes, +but it may break some pipelines. ## 2.1.0 @@ -82,7 +146,7 @@ Fixing some bugs associated with AWS evaluation and the drought-monitor applicat * Added `MODIS` datasource `datalib.modis_pds` * Added `datalib.weathercitizen` to retrieve weathercitizen data * Added `datalib.cosmos_stations` to retrieve soil moisture data from the stationary COSMOS soil moisture network -* Added `algorithm.ResampleReduce`, which allows users to coarsen a dataset based on a reduce operation (such as mean, max, etc.). +* Added `algorithm.ResampleReduce`, which allows users to coarsen a dataset based on a reduce operation (such as mean, max, etc.). * Added the `managers.parallel` submodule that enables parallel computation with PODPAC in a multi-threaded, multi-process, or multi-AWS-Lambda-function way * Added the `managers.multi_process` submodule that enables PODPAC nodes to be run in another process. * Added the `compositor.UniformTileCompositor` and `compositor.UniformTileMixin` to enable compositing of data sources BEFORE harmonization (so that interpolation can happen across data sources with the same coordinate systems) @@ -97,7 +161,7 @@ Fixing some bugs associated with AWS evaluation and the drought-monitor applicat * Added podpac's version to pipeline definitions ### Bug Fixes -* Fixed `algorithm.GroupReduce` to accept `dayofyear`, `weekofyear`, `season`, and `month`. It also now returns the time coordinate in one of these units. +* Fixed `algorithm.GroupReduce` to accept `dayofyear`, `weekofyear`, `season`, and `month`. It also now returns the time coordinate in one of these units. * Implemented a circular dependency check to avoid infinite recursion and locking up due to cache accessing. This change also defined the `NodeDefinitionError` exception. * Fixed the `UnitsDataArray.to_format` function's `zarr_part` format to work propertly with parallel computations * Added the `[algorithm]` dependencies as part of the AWS Lambda function build -- previously the `numexpr` Python package was missing @@ -112,7 +176,7 @@ Fixing some bugs associated with AWS evaluation and the drought-monitor applicat * Removed `datalib.airmoss` -- it was no longer working! ### Maintenance -* Refactored the way PODPAC keeps track of `Node` definition. Most all of it is now handled by the base class, previously `DataSource`, `Algorithm`, and `Compositor` had to implement specialized functions. +* Refactored the way PODPAC keeps track of `Node` definition. Most all of it is now handled by the base class, previously `DataSource`, `Algorithm`, and `Compositor` had to implement specialized functions. * Refactored `datalib` nodes to prefer using the new `cached_property` decorator instead of `defaults` which were causing severe circular dependencies * Refactored `DataSource` nodes that access files on S3 to use a common `Mixin` * Refactored authentication to use more consistent approach across the library @@ -125,7 +189,7 @@ The purpose of this release was to make the software more robust and to improve ### Features -* Algorithm arrays can now be multi-threaded. This allows an algorithm with multiple S3 data sources to fetch the data +* Algorithm arrays can now be multi-threaded. This allows an algorithm with multiple S3 data sources to fetch the data in parallel before doing the computation, speeding up the process. See #343 * Improvements to AWS interface. See #336 * Added budgeting / billing capability to manage AWS resources. See #361 @@ -150,7 +214,7 @@ The purpose of this release was to make the software more robust and to improve * CSV.lon_col --> lon_key * CSV.time_col --> time_key * CSV.alt_col --> alt_key - + ## 1.2.0 @@ -158,7 +222,7 @@ The purpose of this release was to make the software more robust and to improve The purpose of this release was to develop a short course for AMS2020. A major feature of this release is automated creation of the PODPAC Lambda function. As part of this we implemented a few more additional -features, and fixed a number of bugs. +features, and fixed a number of bugs. ### Features diff --git a/dist/aws/handler.py b/dist/aws/handler.py index 8e1aa72d1..c6bd924b0 100644 --- a/dist/aws/handler.py +++ b/dist/aws/handler.py @@ -16,12 +16,12 @@ def default_pipeline(pipeline=None): """Get default pipeline definiton, merging with input pipline if supplied - + Parameters ---------- pipeline : dict, optional Input pipline. Will fill in any missing defaults. - + Returns ------- dict @@ -46,20 +46,20 @@ def default_pipeline(pipeline=None): # overwrite certain settings so that the function doesn't fail pipeline["settings"]["ROOT_PATH"] = "/tmp" - pipeline["settings"]["LOG_FILE_PATH"] = "/tmp" + pipeline["settings"]["LOG_FILE_PATH"] = "/tmp/podpac.log" return pipeline def get_trigger(event): - """ + """ Helper method to determine the trigger for the lambda invocation - + Parameters ---------- event : dict Event dict from AWS. See [TODO: add link reference] - + Returns ------- str @@ -76,7 +76,7 @@ def get_trigger(event): def parse_event(trigger, event): """Parse pipeline, settings, and output details from event depending on trigger - + Parameters ---------- trigger : str @@ -86,7 +86,7 @@ def parse_event(trigger, event): """ if trigger == "eval": - print ("Triggered by Invoke") + print("Triggered by Invoke") # event is the pipeline, provide consistent pipeline defaults pipeline = default_pipeline(event) @@ -94,7 +94,7 @@ def parse_event(trigger, event): return pipeline elif trigger == "S3": - print ("Triggered from S3") + print("Triggered from S3") # get boto s3 client s3 = boto3.client("s3") @@ -133,7 +133,7 @@ def parse_event(trigger, event): return pipeline elif trigger == "APIGateway": - print ("Triggered from API Gateway") + print("Triggered from API Gateway") pipeline = default_pipeline() pipeline["url"] = event["queryStringParameters"] @@ -158,8 +158,8 @@ def parse_event(trigger, event): # If we get here, the api settings were loaded pipeline["settings"] = {**pipeline["settings"], **api_settings} except Exception as e: - print ("Got an exception when attempting to load api settings: ", e) - print (pipeline) + print("Got an exception when attempting to load api settings: ", e) + print(pipeline) # handle OUTPUT in query parameters elif param == "output": @@ -187,7 +187,7 @@ def parse_event(trigger, event): def handler(event, context): """Lambda function handler - + Parameters ---------- event : dict @@ -199,7 +199,7 @@ def handler(event, context): ret_pipeline : bool, optional Description """ - print (event) + print(event) # Add /tmp/ path to handle python path for dependencies sys.path.append("/tmp/") @@ -229,21 +229,21 @@ def handler(event, context): else: dependencies = "podpac_deps_{}.zip".format( os.environ.get("PODPAC_VERSION", pipeline["settings"].get("PODPAC_VERSION")) - ) - if 'None' in dependencies: - dependencies = 'podpac_deps.zip' # Development version of podpac + ) + if "None" in dependencies: + dependencies = "podpac_deps.zip" # Development version of podpac # this should be equivalent to version.semver() # Check to see if this function is "hot", in which case the dependencies have already been downloaded and are # available for use right away. if os.path.exists("/tmp/scipy"): - print ( + print( "Scipy has been detected in the /tmp/ directory. Assuming this function is hot, dependencies will" " not be downloaded." ) else: # Download dependencies from specific bucket/object - print ("Downloading and extracting dependencies from {} {}".format(bucket, dependencies)) + print("Downloading and extracting dependencies from {} {}".format(bucket, dependencies)) s3 = boto3.client("s3") s3.download_file(bucket, dependencies, "/tmp/" + dependencies) subprocess.call(["unzip", "/tmp/" + dependencies, "-d", "/tmp"]) @@ -266,7 +266,7 @@ def handler(event, context): # update podpac settings with inputs from the trigger settings.update(json.loads(os.environ.get("SETTINGS", "{}"))) settings.update(pipeline["settings"]) - + # build the Node and Coordinates if trigger in ("eval", "S3"): node = Node.from_definition(pipeline["pipeline"]) @@ -302,7 +302,7 @@ def handler(event, context): try: json.dumps(body) except Exception as e: - print ("Output body is not serializable, attempting to decode.") + print("Output body is not serializable, attempting to decode.") body = body.decode() return { diff --git a/doc/source/api.rst b/doc/source/api.rst index 8ec1eb528..4bdcc7c1f 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -72,6 +72,7 @@ Generic data source wrappers podpac.data.CSV podpac.data.Dataset podpac.data.H5PY + podpac.data.OGR podpac.data.PyDAP podpac.data.Rasterio podpac.data.WCS diff --git a/doc/source/install.md b/doc/source/install.md index 4f7f4d8c7..4e1eb30b3 100644 --- a/doc/source/install.md +++ b/doc/source/install.md @@ -20,11 +20,11 @@ Confirm you have the required dependencies installed on your computer: ### Environment -If using Anaconda Python, create a PODPAC dedicated Anconda environment: +If using Anaconda Python (recommended for Windows), create a PODPAC dedicated Anconda environment: ``` -# create environment with all `anaconda` packages -$ conda create -n podpac python=3 anaconda +# create environment +$ conda create -n podpac python=3 # activate environment $ conda activate podpac @@ -33,16 +33,47 @@ $ conda activate podpac If using a non-Anaconda Python distribution, create a PODPAC dedicated virtual environment: ``` -# create environment in +# create environment in using virtualenv $ python3 -m venv # activate environment $ source /bin/activate + +# Create environment named `podpac` using pyenv +$ pyenv virtualenv 3.8.6 podpac +$ pyenv activate podpac ``` ### Install -After activating the virtual environment, install using `pip` with one of the following commands: +#### Windows Pre-Installation Instructions with Anaconda +Some packages are simpler to install through `conda` compared to `pip` on Windows. NOTE: These +instructions tend to change based on the specific version of `conda`. Substituting which packages +are installed from `conda-forge` vs the `default` `conda` channel can usually result in a +functional installation. + +For a full PODPAC installation pre-install the +following packages in a clean `conda` environment: + +```bash +# Install core dependencies +> conda install matplotlib>=2.1 numpy>=1.14 scipy>=1.0 traitlets>=4.3 xarray>=0.10 ipython psutil requests>=2.18 owslib beautifulsoup4>=4.6 h5py>=2.9 lxml>=4.2 zarr>=2.3 intake>=0.5 boto3>=1.4 s3fs>=0.2 numexpr>=2.6 jupyterlab ipywidgets nodejs +> conda install pyproj>=2.2 rasterio>=1.0 ipympl ipyleaflet -c conda-forge + +# Set up jupyter lab extensions +> jupyter labextension install @jupyter-widgets/jupyterlab-manager +> jupyter labextension install jupyter-leaflet +> jupyter labextension install jupyter-matplotlib +> jupyter nbextension enable --py widgetsnbextension +> jupyter lab build + +# clean conda environment (optional) +> conda clean -a -y +``` + +#### All Operating systems + +After activating the environment, install using `pip` with one of the following commands: ``` $ pip install podpac # base installation @@ -163,12 +194,12 @@ $ git checkout -b develop origin/develop # latest stable version From the root of the git repository, install using `pip` with one of the following commands: ``` -$ pip install . # base installation -$ pip install .[datatype] # install podpac and optional data handling dependencies -$ pip install .[notebook] # install podpac and optional notebook dependencies -$ pip install .[aws] # install podpac and optional aws dependencies -$ pip install .[algorithm] # install podpac and optional algorithm dependencies -$ pip install .[all] # install podpac and all optional dependencies +$ pip install -e . # base installation +$ pip install -e .[datatype] # install podpac and optional data handling dependencies +$ pip install -e .[notebook] # install podpac and optional notebook dependencies +$ pip install -e .[aws] # install podpac and optional aws dependencies +$ pip install -e .[algorithm] # install podpac and optional algorithm dependencies +$ pip install -e .[all] # install podpac and all optional dependencies ``` See [Optional Dependencies](dependencies.html#optional-dependencies) more information on optional PODPAC dependencies. diff --git a/doc/source/interpolation.md b/doc/source/interpolation.md index 2990d0c0a..dc757d9d6 100644 --- a/doc/source/interpolation.md +++ b/doc/source/interpolation.md @@ -3,7 +3,9 @@ ## Description PODPAC allows users to specify various different interpolation schemes for nodes with -increased granularity, and even lets users write their own interpolators. +increased granularity, and even lets users write their own interpolators. By default +PODPAC uses the `podpac.settings["DEFAULT_INTERPOLATION"] == "nearest"`, which may +be modified by users. Relevant example notebooks include: * [Advanced Interpolation](https://github.com/creare-com/podpac-examples/blob/master/notebooks/4-advanced/interpolation.ipynb) @@ -11,8 +13,8 @@ Relevant example notebooks include: * [Drought Monitor Data Access Harmonization Processing](https://github.com/creare-com/podpac-examples/blob/master/notebooks/examples/drought-monitor/03-data-access-harmonization-processing.ipynb) ## Examples -Consider a `DataSource` with `lat`, `lon`, `time` coordinates that we will instantiate as: -`node = DataSource(..., interpolation=interpolation)` +Consider a `DataSource` with `lat`, `lon`, `time` coordinates that we will instantiate as: +`node = DataSource(..., interpolation=interpolation)` `interpolation` can be specified ... @@ -20,7 +22,7 @@ Consider a `DataSource` with `lat`, `lon`, `time` coordinates that we will insta `interpolation='nearest'` * **Descripition**: All dimensions are interpolated using nearest neighbor interpolation. This is the default, but available options can be found here: `podpac.core.interpolation.interpolation.INTERPOLATION_METHODS` . -* **Details**: PODPAC will automatically select appropriate interpolators based on the source coordinates and eval coordinates. Default interpolator orders can be found in `podpac.core.interpolation.interpolation.INTERPOLATION_METHODS_DICT` +* **Details**: PODPAC will automatically select appropriate interpolators based on the source coordinates and eval coordinates. Default interpolator orders can be found in `podpac.core.interpolation.interpolation.INTERPOLATION_METHODS_DICT` ### ...as a dictionary @@ -35,7 +37,7 @@ interpolation = { } ``` * **Descripition**: All dimensions are interpolated using nearest neighbor interpolation, and the type of interpolators are tried in the order specified. For applicable interpolators, the specified parameters will be used. -* **Details**: PODPAC loops through the `interpolators` list, checking if the interpolator is able to interpolate between the evaluated and source coordinates. The first capable interpolator available will be used. +* **Details**: PODPAC loops through the `interpolators` list, checking if the interpolator is able to interpolate between the evaluated and source coordinates. The first capable interpolator available will be used. ### ...as a list @@ -52,8 +54,8 @@ interpolation = [ ] ``` -* **Descripition**: The dimensions listed in the `'dims'` list will used the specified method. These dictionaries can also specify the same field shown in the previous section. -* **Details**: PODPAC loops through the `interpolation` list, using the settings specified for each dimension independently. +* **Descripition**: The dimensions listed in the `'dims'` list will used the specified method. These dictionaries can also specify the same field shown in the previous section. +* **Details**: PODPAC loops through the `interpolation` list, using the settings specified for each dimension independently. **NOTE! Specifying the interpolation as a list also control the ORDER of interpolation.** The first item in the list will be interpolated first. In this case, `lat`/`lon` will be bilinearly interpolated BEFORE `time` is nearest-neighbor interpolated. @@ -63,40 +65,40 @@ The list of available interpolators are as follows: * `NearestNeighbor`: A custom implementation based on `scipy.cKDtree`, which handles nearly any combination of source and destination coordinates * `XarrayInterpolator`: A light-weight wrapper around `xarray`'s `DataArray.interp` method, which is itself a wrapper around `scipy` interpolation functions, but with a clean `xarray` interface * `RasterioInterpolator`: A wrapper around `rasterio`'s interpolation/reprojection routines. Appropriate for grid-to-grid interpolation. -* `ScipyGrid`: An optimized implementation for `grid` sources that uses `scipy`'s `RegularGridInterpolator`, or `RectBivariateSplit` interpolator depending on the method. +* `ScipyGrid`: An optimized implementation for `grid` sources that uses `scipy`'s `RegularGridInterpolator`, or `RectBivariateSplit` interpolator depending on the method. * `ScipyPoint`: An implementation based on `scipy.KDtree` capable of `nearest` interpolation for `point` sources * `NearestPreview`: An approximate nearest-neighbor interpolator useful for rapidly viewing large files -The default order for these interpolators can be found in `podpac.data.INTERPOLATORS`. +The default order for these interpolators can be found in `podpac.data.INTERPOLATORS`. ### NearestNeighbor Since this is the most general of the interpolators, this section deals with the available parameters and settings for the `NearestNeighbor` interpolator. #### Parameters -The following parameters can be set by specifying the interpolation as a dictionary or a list, as described above. +The following parameters can be set by specifying the interpolation as a dictionary or a list, as described above. * `respect_bounds` : `bool` - * Default is `True`. If `True`, any requested dimension OUTSIDE of the bounds will be interpolated as `nan`. + * Default is `True`. If `True`, any requested dimension OUTSIDE of the bounds will be interpolated as `nan`. Otherwise, any point outside the bounds will have the value of the nearest neighboring point * `remove_nan` : `bool` * Default is `False`. If `True`, `nan`'s in the source dataset will NOT be interpolated. This can be used if a value for the function is needed at every point of the request. It is not helpful when computing statistics, where `nan` values will be explicitly - ignored. In that case, if `remove_nan` is `True`, `nan` values will take on the values of neighbors, skewing the statistical result. + ignored. In that case, if `remove_nan` is `True`, `nan` values will take on the values of neighbors, skewing the statistical result. * `*_tolerance` : `float`, where `*` in ["spatial", "time", "alt"] * Default is `inf`. Maximum distance to the nearest coordinate to be interpolated. Corresponds to the unit of the `*` dimension. * `*_scale` : `float`, where `*` in ["spatial", "time", "alt"] * Default is `1`. This only applies when the source has stacked dimensions with different units. The `*_scale` - defines the factor that the coordinates will be scaled by (coordinates are divided by `*_scale`) to output - a valid distance for the combined set of dimensions. + defines the factor that the coordinates will be scaled by (coordinates are divided by `*_scale`) to output + a valid distance for the combined set of dimensions. For example, when "lat, lon, and alt" dimensions are stacked, ["lat", "lon"] are in degrees - and "alt" is in feet, the `*_scale` parameters should be set so that - `|| [dlat / spatial_scale, dlon / spatial_scale, dalt / alt_scale] ||` results in a reasonable distance. + and "alt" is in feet, the `*_scale` parameters should be set so that + `|| [dlat / spatial_scale, dlon / spatial_scale, dalt / alt_scale] ||` results in a reasonable distance. * `use_selector` : `bool` - * Default is `True`. If `True`, a subset of the coordinates will be selected BEFORE the data of a dataset is retrieved. This + * Default is `True`. If `True`, a subset of the coordinates will be selected BEFORE the data of a dataset is retrieved. This reduces the number of data retrievals needed for large datasets. In cases where `remove_nan` = `True`, the selector may select only `nan` points, in which case the interpolation fails to produce non-`nan` data. This usually happens when requesting a single - point from a dataset that contains `nan`s. As such, in these cases set `use_selector` = `False` to get a non-`nan` value. + point from a dataset that contains `nan`s. As such, in these cases set `use_selector` = `False` to get a non-`nan` value. #### Advanced NearestNeighbor Interpolation Examples * Only interpolate points that are within `1` of the source data lat/lon locations @@ -151,15 +153,15 @@ interpolation = [ "remove_nan": True, }, "dims": ["time"] - }, + }, { "method": "nearest", "dims": ["lat", "lon", "alt"] - }, + }, ] ``` ## Notes and Caveats -While the API is well developed, all conceivable functionality is not. For example, while we can interpolate gridded data to point data, point data to grid data interpolation is not as well supported, and there may be errors or unexpected results. Advanced users can develop their own interpolators, but this is not currently well-documented. +While the API is well developed, all conceivable functionality is not. For example, while we can interpolate gridded data to point data, point data to grid data interpolation is not as well supported, and there may be errors or unexpected results. Advanced users can develop their own interpolators, but this is not currently well-documented. -**Gotcha**: Parameters for a specific interpolator may be ignored if a different interpolator is automatically selected. These ignored parameters are logged as warnings. +**Gotcha**: Parameters for a specific interpolator may be ignored if a different interpolator is automatically selected. These ignored parameters are logged as warnings. diff --git a/podpac/conftest.py b/podpac/conftest.py index 18fd62a65..4d413bd57 100644 --- a/podpac/conftest.py +++ b/podpac/conftest.py @@ -2,6 +2,7 @@ Test Setup """ +import copy import pytest from podpac.core.settings import settings @@ -50,12 +51,23 @@ def pytest_unconfigure(config): pass -original_default_cache = settings["DEFAULT_CACHE"] +original_settings = None def pytest_sessionstart(session): + # save settings + global original_settings + original_settings = copy.copy(settings) + + # set the default cache settings["DEFAULT_CACHE"] = [] def pytest_sessionfinish(session, exitstatus): - settings["DEFAULT_CACHE"] = original_default_cache + # restore settings + keys = list(settings.keys()) + for key in keys: + if key in original_settings: + settings[key] = original_settings[key] + else: + del settings[key] diff --git a/podpac/core/algorithm/reprojection.py b/podpac/core/algorithm/reprojection.py index 5ed33ca76..7a6cb82fa 100644 --- a/podpac/core/algorithm/reprojection.py +++ b/podpac/core/algorithm/reprojection.py @@ -1,5 +1,5 @@ """ -Reprojection Algorithm Node +Reprojection Algorithm Node """ from __future__ import division, unicode_literals, print_function, absolute_import @@ -79,7 +79,11 @@ def reprojection_coordinates(self): def _source_eval(self, coordinates, selector, output=None): coords = self.reprojection_coordinates.intersect(coordinates, outer=True) - coords = merge_dims([coords, coordinates.drop(self.reproject_dims or self.reprojection_coordinates.dims)]) + extra_eval_coords = coordinates.drop(self.reproject_dims or self.reprojection_coordinates.dims) + if coords.crs != coordinates.crs: + # Better to evaluate in reproject coordinate crs than eval crs for next step of interpolation + extra_eval_coords = extra_eval_coords.transform(coords.crs) + coords = merge_dims([coords, extra_eval_coords]) return self.source.eval(coords, output=output, _selector=selector) @property diff --git a/podpac/core/algorithm/test/test_reprojection.py b/podpac/core/algorithm/test/test_reprojection.py index 19a3bedec..d40c7a0c6 100644 --- a/podpac/core/algorithm/test/test_reprojection.py +++ b/podpac/core/algorithm/test/test_reprojection.py @@ -3,7 +3,7 @@ import pytest import numpy as np -from numpy.testing import assert_equal, assert_array_equal +from numpy.testing import assert_equal, assert_array_equal, assert_almost_equal import traitlets as tl import podpac @@ -19,6 +19,11 @@ class TestReprojection(object): source_coarse = Array( source=[[0, 4, 8], [36, 40, 44], [72, 76, 80]], coordinates=coarse_coords, interpolation="bilinear" ) + source_coarse2 = Array( + source=[[0, 4, 8], [36, 40, 44], [72, 76, 80]], + coordinates=coarse_coords.transform("EPSG:3857"), + interpolation="bilinear", + ) def test_reprojection_Coordinates(self): reproject = Reproject(source=self.source, interpolation="bilinear", coordinates=self.coarse_coords) @@ -63,3 +68,32 @@ def test_reprojection_source_str(self): node = podpac.Node.from_json(reproject.json) o3 = node.eval(self.coarse_coords) assert_array_equal(o1.data, o3.data) + + def test_reprojection_Coordinates_crs(self): + # same eval and source but different reproject + reproject = Reproject( + source=self.source, + interpolation={"method": "bilinear", "params": {"fill_value": "extrapolate"}}, + coordinates=self.coarse_coords.transform("EPSG:3857"), + ) + o1 = reproject.eval(self.source_coords) + # We have to use a second source here because the reprojected source + # gets interpreted as having it's source coordinates in EPSG:3857 + # and when being subsampled, there's a warping effect... + o2 = self.source_coarse2.eval(self.source_coords) + assert_almost_equal(o1.data, o2.data, decimal=13) + + node = podpac.Node.from_json(reproject.json) + o3 = node.eval(self.source_coords) + assert_array_equal(o1.data, o3.data) + + # same eval and reproject but different source + o1 = reproject.eval(self.source_coords.transform("EPSG:3857")) + o2 = self.source_coarse2.eval(self.source_coords.transform("EPSG:3857")) + assert_almost_equal(o1.data, o2.data, decimal=13) + + # same source and reproject but different eval + reproject = Reproject(source=self.source, interpolation="bilinear", coordinates=self.coarse_coords) + o1 = reproject.eval(self.source_coords.transform("EPSG:3857")) + o2 = self.source_coarse.eval(self.source_coords.transform("EPSG:3857")) + assert_almost_equal(o1.data, o2.data, decimal=13) diff --git a/podpac/core/algorithm/utility.py b/podpac/core/algorithm/utility.py index 6972c1c2e..a72eceb8b 100644 --- a/podpac/core/algorithm/utility.py +++ b/podpac/core/algorithm/utility.py @@ -44,7 +44,7 @@ class CoordData(Algorithm): Name of coordinate to extract (one of lat, lon, time, alt) """ - coord_name = tl.Unicode("").tag(attr=True) + coord_name = tl.Enum(["time", "lat", "lon", "alt"], default_value="none", allow_none=False).tag(attr=True) def algorithm(self, inputs, coordinates): """Extract coordinate from request and makes data available. diff --git a/podpac/core/authentication.py b/podpac/core/authentication.py index 2a4df8417..a80a30550 100644 --- a/podpac/core/authentication.py +++ b/podpac/core/authentication.py @@ -1,5 +1,5 @@ """ -PODPAC Authentication +PODPAC Authentication """ @@ -189,7 +189,7 @@ def _create_session(self): class S3Mixin(tl.HasTraits): """ Mixin to add S3 credentials and access to a Node. """ - anon = tl.Bool(False) + anon = tl.Bool(False).tag(attr=True) aws_access_key_id = tl.Unicode(allow_none=True) aws_secret_access_key = tl.Unicode(allow_none=True) aws_region_name = tl.Unicode(allow_none=True) diff --git a/podpac/core/compositor/compositor.py b/podpac/core/compositor/compositor.py index c42798c81..2d6ca3d15 100644 --- a/podpac/core/compositor/compositor.py +++ b/podpac/core/compositor/compositor.py @@ -17,7 +17,6 @@ from podpac.core.utils import common_doc, NodeTrait from podpac.core.node import COMMON_NODE_DOC, Node from podpac.core.data.datasource import COMMON_DATA_DOC -from podpac.core.interpolation import InterpolationTrait from podpac.core.managers.multi_threading import thread_manager COMMON_COMPOSITOR_DOC = COMMON_DATA_DOC.copy() # superset of COMMON_NODE_DOC @@ -33,8 +32,8 @@ class BaseCompositor(Node): Source nodes. source_coordinates : :class:`podpac.Coordinates` Coordinates that make each source unique. Must the same size as ``sources`` and single-dimensional. Optional. - interpolation : str, dict, optional - {interpolation} + multithreading : bool, optional + Default is False. If True, will always evaluate the compositor in serial, ignoring any MULTITHREADING settings Notes ----- @@ -51,6 +50,11 @@ class BaseCompositor(Node): sources = tl.List(trait=NodeTrait()).tag(attr=True) source_coordinates = tl.Instance(Coordinates, allow_none=True, default_value=None).tag(attr=True) + multithreading = tl.Bool(False) + + @tl.default("multithreading") + def _default_multithreading(self): + return settings["MULTITHREADING"] dims = tl.List(trait=Dimension()).tag(attr=True) auto_outputs = tl.Bool(False) @@ -195,14 +199,14 @@ def iteroutputs(self, coordinates, _selector=None): yield self.create_output_array(coordinates) return - if settings["MULTITHREADING"]: + if self.multithreading: n_threads = thread_manager.request_n_threads(len(sources)) if n_threads == 1: thread_manager.release_n_threads(n_threads) else: n_threads = 0 - if settings["MULTITHREADING"] and n_threads > 1: + if self.multithreading and n_threads > 1: # evaluate nodes in parallel using thread pool self._multi_threaded = True pool = thread_manager.get_thread_pool(processes=n_threads) diff --git a/podpac/core/compositor/ordered_compositor.py b/podpac/core/compositor/ordered_compositor.py index 14346ef9a..1be0312f2 100644 --- a/podpac/core/compositor/ordered_compositor.py +++ b/podpac/core/compositor/ordered_compositor.py @@ -1,6 +1,7 @@ from __future__ import division, unicode_literals, print_function, absolute_import import numpy as np +import traitlets as tl from podpac.core.node import NodeException from podpac.core.units import UnitsDataArray @@ -20,10 +21,13 @@ class OrderedCompositor(BaseCompositor): Source nodes, in order of preference. Later sources are only used where earlier sources do not provide data. source_coordinates : :class:`podpac.Coordinates` Coordinates that make each source unique. Must the same size as ``sources`` and single-dimensional. Optional. - interpolation : str, dict, optional - {interpolation} + multithreading : bool, optional + Default is True. If True, will always evaluate the compositor in serial, ignoring any MULTITHREADING settings + """ + multithreading = tl.Bool(False) + @common_doc(COMMON_COMPOSITOR_DOC) def composite(self, coordinates, data_arrays, result=None): """Composites data_arrays in order that they appear. Once a request contains no nans, the result is returned. diff --git a/podpac/core/compositor/test/test_ordered_compositor.py b/podpac/core/compositor/test/test_ordered_compositor.py index 425783d32..02ca35a19 100644 --- a/podpac/core/compositor/test/test_ordered_compositor.py +++ b/podpac/core/compositor/test/test_ordered_compositor.py @@ -100,10 +100,14 @@ def test_composite_short_circuit_multithreaded(self): n_threads_before = podpac.core.managers.multi_threading.thread_manager._n_threads_used a = Array(source=np.ones(coords.shape), coordinates=coords, interpolation="bilinear") b = Array(source=np.zeros(coords.shape), coordinates=coords, interpolation="bilinear") - node = OrderedCompositor(sources=[a, b]) + node = OrderedCompositor(sources=[a, b], multithreading=True) + node2 = OrderedCompositor(sources=[a, b], multithreading=False) output = node.eval(coords) + output2 = node2.eval(coords) np.testing.assert_array_equal(output, a.source) + np.testing.assert_array_equal(output2, a.source) assert node._multi_threaded == True + assert node2._multi_threaded == False assert podpac.core.managers.multi_threading.thread_manager._n_threads_used == n_threads_before def test_composite_into_result(self): diff --git a/podpac/core/coordinates/array_coordinates1d.py b/podpac/core/coordinates/array_coordinates1d.py index 45ddff368..28c5f254c 100644 --- a/podpac/core/coordinates/array_coordinates1d.py +++ b/podpac/core/coordinates/array_coordinates1d.py @@ -77,9 +77,7 @@ def __init__(self, coordinates, name=None, **kwargs): self._is_uniform = None else: - deltas = (self.coordinates[1:] - self.coordinates[:-1]).astype(float) * np.sign( - self.coordinates[1] - self.coordinates[0] - ).astype(float) + deltas = self.deltas if np.any(deltas <= 0): self._is_monotonic = False self._is_descending = False @@ -262,6 +260,7 @@ def reshape(self, newshape): # ------------------------------------------------------------------------------------------------------------------ def __getitem__(self, index): + # The following 3 lines are copied by UniformCoordinates1d.__getitem__ if self.ndim == 1 and np.ndim(index) > 1 and np.array(index).dtype == int: index = np.array(index).flatten().tolist() return ArrayCoordinates1d(self.coordinates[index], **self.properties) @@ -270,6 +269,12 @@ def __getitem__(self, index): # Properties # ------------------------------------------------------------------------------------------------------------------ + @property + def deltas(self): + return (self.coordinates[1:] - self.coordinates[:-1]).astype(float) * np.sign( + self.coordinates[1] - self.coordinates[0] + ).astype(float) + @property def ndim(self): return self.coordinates.ndim diff --git a/podpac/core/coordinates/coordinates.py b/podpac/core/coordinates/coordinates.py index 9cd388d5a..1aafa1d9f 100644 --- a/podpac/core/coordinates/coordinates.py +++ b/podpac/core/coordinates/coordinates.py @@ -23,7 +23,7 @@ import podpac from podpac.core.settings import settings -from podpac.core.utils import OrderedDictTrait, _get_query_params_from_url, _get_param +from podpac.core.utils import OrderedDictTrait, _get_query_params_from_url, _get_param, cached_property from podpac.core.coordinates.utils import has_alt_units from podpac.core.coordinates.base_coordinates import BaseCoordinates from podpac.core.coordinates.coordinates1d import Coordinates1d @@ -306,7 +306,7 @@ def points(cls, crs=None, dims=None, **kwargs): return cls([stacked], crs=crs) @classmethod - def from_xarray(cls, x, crs=None): + def from_xarray(cls, x, crs=None, validate_crs=False): """ Create podpac Coordinates from xarray coords. @@ -317,6 +317,8 @@ def from_xarray(cls, x, crs=None): crs : str, optional Coordinate reference system. Supports any PROJ4 or PROJ6 compliant string (https://proj.org/). If not provided, the crs will be loaded from ``x.attrs`` if possible. + validate_crs: bool, optional + Default is False. If True, the crs will be validated. Returns ------- @@ -360,7 +362,7 @@ def from_xarray(cls, x, crs=None): # unstacked d[dim] = ArrayCoordinates1d.from_xarray(xcoords[dim]) - coords = cls(list(d.values()), crs=crs) + coords = cls(list(d.values()), crs=crs, validate_crs=validate_crs) return coords @classmethod @@ -441,7 +443,9 @@ def from_url(cls, url): if coords["crs"] is None: coords["crs"] = _get_param(params, "CRS") - if _get_param(params, "VERSION").startswith("1.1"): + if _get_param(params, "SERVICE") == "WCS": + r = -1 + elif _get_param(params, "VERSION").startswith("1.1"): r = -1 elif _get_param(params, "VERSION").startswith("1.3"): r = 1 @@ -460,7 +464,7 @@ def from_url(cls, url): size = np.array([_get_param(params, "WIDTH"), _get_param(params, "HEIGHT")], int)[::r] coords["coords"] = [ - {"name": "lat", "start": start[0], "stop": stop[0], "size": size[0]}, + {"name": "lat", "start": stop[0], "stop": start[0], "size": size[0]}, {"name": "lon", "start": start[1], "stop": stop[1], "size": size[1]}, ] @@ -470,7 +474,7 @@ def from_url(cls, url): return cls.from_definition(coords) @classmethod - def from_geotransform(cls, geotransform, shape, crs=None): + def from_geotransform(cls, geotransform, shape, crs=None, validate_crs=True): """Creates Coordinates from GDAL Geotransform.""" tol = 1e-15 # tolerance for deciding when a number is zero # Handle the case of rotated coordinates @@ -502,6 +506,7 @@ def from_geotransform(cls, geotransform, shape, crs=None): podpac.clinspace(origin[1], end[1], shape[::order][1], "lon"), ][::order], crs=crs, + validate_crs=validate_crs, ) return coords @@ -854,7 +859,7 @@ def json(self): return json.dumps(self.definition, separators=(",", ":"), cls=podpac.core.utils.JSONEncoder) - @property + @cached_property def hash(self): """:str: Coordinates hash value.""" # We can't use self.json for the hash because the CRS is not standardized. @@ -969,7 +974,6 @@ def drop(self, dims, ignore_missing=False): [c for c in self._coords.values() if c.name not in dims], validate_crs=False, **self.properties ) - # do we ever need this? def udrop(self, dims, ignore_missing=False): """ Remove the given individual dimensions from the Coordinates `udims`. @@ -1425,13 +1429,20 @@ def _simplified_transform(self, transformer, cs): lat_sample = np.linspace(self["lat"].bounds[0], self["lat"].bounds[1], 5) lon_sample = np.linspace(self["lon"].bounds[0], self["lon"].bounds[1], 5) sample = StackedCoordinates(np.meshgrid(lat_sample, lon_sample, indexing="ij"), dims=["lat", "lon"]) + # The sample tests if the crs transform is linear, or non-linear. The results are as follows: + # + # Start from "uniform stacked" + # 1. Returns "uniform unstacked" <-- simple scaling between crs's + # 2. Returns "array unstacked" <-- Orthogonal coordinates still, but non-linear in this dim + # 3. Returns "Stacked" <-- not orthogonal from one crs to the other + # t = sample._transform(transformer) if isinstance(t, StackedCoordinates): # Need to transform ALL the coordinates return # Then we can do a faster transform, either already done or just the diagonal for i, j in zip([0, 1], [1, 0]): - if isinstance(t[i], UniformCoordinates1d): # already done + if isinstance(t[i], UniformCoordinates1d) and isinstance(cs[i], UniformCoordinates1d): # already done start = t[i].start stop = t[i].stop if self[t[i].name].is_descending: diff --git a/podpac/core/coordinates/test/test_coordinates.py b/podpac/core/coordinates/test/test_coordinates.py index 85724bf71..964a42606 100644 --- a/podpac/core/coordinates/test/test_coordinates.py +++ b/podpac/core/coordinates/test/test_coordinates.py @@ -2,9 +2,10 @@ import json from copy import deepcopy + import pytest import numpy as np -from numpy.testing import assert_equal +from numpy.testing import assert_equal, assert_array_equal import xarray as xr import pyproj @@ -371,6 +372,122 @@ def test_grid_points_order(self): else: Coordinates.grid(lat=lat, lon=lon, time=dates) + def test_from_url(self): + crds = Coordinates([[41, 40], [-71, -70], "2018-05-19"], dims=["lat", "lon", "time"]) + crds2 = crds.transform("EPSG:3857") + + url = ( + r"http://testwms/?map=map&&service=WMS&request=GetMap&layers=layer&styles=&format=image%2Fpng" + r"&transparent=true&version={version}&transparency=true&width=256&height=256&srs=EPSG%3A{epsg}" + r"&bbox={},{},{},{}&time={}" + ) + + # version 1.1.1 + c = Coordinates.from_url( + url.format( + min(crds2.bounds["lon"]), + min(crds2.bounds["lat"]), + max(crds2.bounds["lon"]), + max(crds2.bounds["lat"]), + crds2.bounds["time"][0], + version="1.1.1", + epsg="3857", + ) + ) + assert c.bounds == crds2.bounds + + c = Coordinates.from_url( + url.format( + min(crds.bounds["lon"]), + min(crds.bounds["lat"]), + max(crds.bounds["lon"]), + max(crds.bounds["lat"]), + crds.bounds["time"][0], + version="1.1.1", + epsg="4326", + ) + ) + assert c.bounds == crds.bounds + + # version 1.3 + c = Coordinates.from_url( + url.format( + min(crds2.bounds["lon"]), + min(crds2.bounds["lat"]), + max(crds2.bounds["lon"]), + max(crds2.bounds["lat"]), + crds2.bounds["time"][0], + version="1.3", + epsg="3857", + ) + ) + assert c.bounds == crds2.bounds + + c = Coordinates.from_url( + url.format( + min(crds.bounds["lat"]), + min(crds.bounds["lon"]), + max(crds.bounds["lat"]), + max(crds.bounds["lon"]), + crds.bounds["time"][0], + version="1.3", + epsg="4326", + ) + ) + + assert c.bounds == crds.bounds + + # WCS version + crds = Coordinates([[41, 40], [-71, -70], "2018-05-19"], dims=["lat", "lon", "time"]) + crds2 = crds.transform("EPSG:3857") + + url = ( + r"http://testwms/?map=map&&service=WCS&request=GetMap&layers=layer&styles=&format=image%2Fpng" + r"&transparent=true&version={version}&transparency=true&width=256&height=256&srs=EPSG%3A{epsg}" + r"&bbox={},{},{},{}&time={}" + ) + + c = Coordinates.from_url( + url.format( + min(crds2.bounds["lon"]), + min(crds2.bounds["lat"]), + max(crds2.bounds["lon"]), + max(crds2.bounds["lat"]), + crds2.bounds["time"][0], + version="1.1", + epsg="3857", + ) + ) + assert c.bounds == crds2.bounds + + # Based on all the documentation I've read, this should be correct, but + # based on the server's I've checked, this does not seem correct + # c = Coordinates.from_url( + # url.format( + # min(crds.bounds["lat"]), + # min(crds.bounds["lon"]), + # max(crds.bounds["lat"]), + # max(crds.bounds["lon"]), + # crds.bounds["time"][0], + # version="1.1", + # epsg="4326", + # ) + # ) + + c = Coordinates.from_url( + url.format( + min(crds.bounds["lon"]), + min(crds.bounds["lat"]), + max(crds.bounds["lon"]), + max(crds.bounds["lat"]), + crds.bounds["time"][0], + version="1.1", + epsg="4326", + ) + ) + + assert c.bounds == crds.bounds + def test_from_xarray(self): lat = [0, 1, 2] lon = [10, 20, 30] @@ -582,71 +699,6 @@ def test_json(self): c2 = Coordinates.from_json(s) assert c2 == c - def test_from_url(self): - crds = Coordinates([[41, 40], [-71, -70], "2018-05-19"], dims=["lat", "lon", "time"]) - crds2 = crds.transform("EPSG:3857") - - url = ( - r"http://testwms/?map=map&&service=WMS&request=GetMap&layers=layer&styles=&format=image%2Fpng" - r"&transparent=true&version={version}&transparency=true&width=256&height=256&srs=EPSG%3A{epsg}" - r"&bbox={},{},{},{}&time={}" - ) - - # version 1.1.1 - c = Coordinates.from_url( - url.format( - min(crds2.bounds["lon"]), - min(crds2.bounds["lat"]), - max(crds2.bounds["lon"]), - max(crds2.bounds["lat"]), - crds2.bounds["time"][0], - version="1.1.1", - epsg="3857", - ) - ) - assert c.bounds == crds2.bounds - - c = Coordinates.from_url( - url.format( - min(crds.bounds["lon"]), - min(crds.bounds["lat"]), - max(crds.bounds["lon"]), - max(crds.bounds["lat"]), - crds.bounds["time"][0], - version="1.1.1", - epsg="4326", - ) - ) - assert c.bounds == crds.bounds - - # version 1.3 - c = Coordinates.from_url( - url.format( - min(crds2.bounds["lon"]), - min(crds2.bounds["lat"]), - max(crds2.bounds["lon"]), - max(crds2.bounds["lat"]), - crds2.bounds["time"][0], - version="1.3", - epsg="3857", - ) - ) - assert c.bounds == crds2.bounds - - c = Coordinates.from_url( - url.format( - min(crds.bounds["lat"]), - min(crds.bounds["lon"]), - max(crds.bounds["lat"]), - max(crds.bounds["lon"]), - crds.bounds["time"][0], - version="1.3", - epsg="4326", - ) - ) - - assert c.bounds == crds.bounds - class TestCoordinatesProperties(object): def test_xarray_coords(self): @@ -2033,3 +2085,12 @@ def test_transform_missing_lat_lon(self): with pytest.raises(ValueError, match="nonadjacent lat and lon"): grid_coords = Coordinates([np.linspace(-10, 10, 21), [1], [1, 2, 3]], dims=["lat", "time", "lon"]) grid_coords.transform(crs="EPSG:2193") + + def test_transform_same_crs_same_result(self): + c1 = Coordinates( + [[1, 2, 4], clinspace(0, 4, 4)], dims=["lat", "lon"], crs="+proj=longlat +datum=WGS84 +no_defs +vunits=m" + ) + c2 = c1.transform("EPSG:4326") + + assert_array_equal(c2["lat"].coordinates, c1["lat"].coordinates) + assert_array_equal(c2["lon"].coordinates, c1["lon"].coordinates) diff --git a/podpac/core/coordinates/uniform_coordinates1d.py b/podpac/core/coordinates/uniform_coordinates1d.py index 692269840..3898c2487 100644 --- a/podpac/core/coordinates/uniform_coordinates1d.py +++ b/podpac/core/coordinates/uniform_coordinates1d.py @@ -17,6 +17,7 @@ ) from podpac.core.coordinates.coordinates1d import Coordinates1d from podpac.core.coordinates.array_coordinates1d import ArrayCoordinates1d +from podpac.core.utils import cached_property class UniformCoordinates1d(Coordinates1d): @@ -215,7 +216,10 @@ def from_definition(cls, d): def __getitem__(self, index): # fallback for non-slices if not isinstance(index, slice): - return ArrayCoordinates1d(self.coordinates, **self.properties)[index] + # The following 3 lines is copied from ArrayCoordinates1d.__getitem__ + if self.ndim == 1 and np.ndim(index) > 1 and np.array(index).dtype == int: + index = np.array(index).flatten().tolist() + return ArrayCoordinates1d(self.coordinates[index], **self.properties) # start, stop, step if index.start is None: @@ -269,7 +273,7 @@ def __contains__(self, item): # Properties # ------------------------------------------------------------------------------------------------------------------ - @property + @cached_property def coordinates(self): """:array, read-only: Coordinate values. """ diff --git a/podpac/core/data/array_source.py b/podpac/core/data/array_source.py index d6817d863..f27736455 100644 --- a/podpac/core/data/array_source.py +++ b/podpac/core/data/array_source.py @@ -57,7 +57,7 @@ class ArrayRaw(NoCacheMixin, DataSource): source = ArrayTrait().tag(attr=True) coordinates = tl.Instance(Coordinates).tag(attr=True) - _repr_keys = ["shape", "interpolation"] + _repr_keys = ["shape"] @tl.validate("source") def _validate_source(self, d): diff --git a/podpac/core/data/dataset_source.py b/podpac/core/data/dataset_source.py index 0c2fbd6cd..d593c4894 100644 --- a/podpac/core/data/dataset_source.py +++ b/podpac/core/data/dataset_source.py @@ -49,6 +49,7 @@ class DatasetRaw(FileKeysMixin, LoadFileMixin, BaseFileSource): # dataset = tl.Instance(xr.Dataset).tag(readonly=True) selection = tl.Dict(allow_none=True, default_value=None).tag(attr=True) infer_podpac_coords = tl.Bool(False).tag(attr=True) + decode_cf = tl.Bool(True) coordinate_index_type = "xarray" # ------------------------------------------------------------------------- @@ -56,7 +57,7 @@ class DatasetRaw(FileKeysMixin, LoadFileMixin, BaseFileSource): # ------------------------------------------------------------------------- def open_dataset(self, fp): - return xr.open_dataset(fp) + return xr.open_dataset(fp, decode_cf=self.decode_cf) def close_dataset(self): super(DatasetRaw, self).close_dataset() diff --git a/podpac/core/data/datasource.py b/podpac/core/data/datasource.py index 743bf376f..4a5c20da0 100644 --- a/podpac/core/data/datasource.py +++ b/podpac/core/data/datasource.py @@ -20,7 +20,7 @@ from podpac.core.units import UnitsDataArray from podpac.core.coordinates import Coordinates, Coordinates1d, StackedCoordinates from podpac.core.coordinates.utils import VALID_DIMENSION_NAMES, make_coord_delta, make_coord_delta_array -from podpac.core.node import Node, NodeException +from podpac.core.node import Node from podpac.core.utils import common_doc from podpac.core.node import COMMON_NODE_DOC @@ -88,7 +88,7 @@ """, "interpolation": """ Interpolation definition for the data source. - By default, the interpolation method is set to ``'nearest'`` for all dimensions. + By default, the interpolation method is set to `podpac.settings["DEFAULT_INTERPOLATION"]` which defaults to 'nearest'` for all dimensions. """, "interpolation_long": """ {interpolation} @@ -137,7 +137,7 @@ class DataSource(Node): nan_vals : List, optional List of values from source data that should be interpreted as 'no data' or 'nans' coordinate_index_type : str, optional - Type of index to use for data source. Possible values are ``['slice', 'numpy']`` + Type of index to use for data source. Possible values are ``['slice', 'numpy', 'xarray']`` Default is 'numpy', which allows a tuple of integer indices. cache_coordinates : bool Whether to cache coordinates using the podpac ``cache_ctrl``. Default False. @@ -372,7 +372,11 @@ def _eval(self, coordinates, output=None, _selector=None): data = rsd if output is None: if requested_coordinates.crs.lower() != coordinates.crs.lower(): - data = self.create_output_array(rsc, data=data.data) + if rsc.shape == data.shape: + data = self.create_output_array(rsc, data=data.data) + else: + crds = Coordinates.from_xarray(data, crs=data.attrs.get("crs", None)) + data = self.create_output_array(crds.transform(rsc.crs), data=data.data) output = data else: output.data[:] = data.data diff --git a/podpac/core/data/ogc.py b/podpac/core/data/ogc.py index 10357f599..7f0ddff73 100644 --- a/podpac/core/data/ogc.py +++ b/podpac/core/data/ogc.py @@ -9,8 +9,9 @@ from functools import reduce import traitlets as tl +import pyproj -from podpac.core.utils import common_doc, cached_property +from podpac.core.utils import common_doc, cached_property, resolve_bbox_order from podpac.core.data.datasource import DataSource from podpac.core.interpolation.interpolation import InterpolationMixin from podpac.core.node import NodeException @@ -23,12 +24,100 @@ bs4 = lazy_module("bs4") lxml = lazy_module("lxml") # used by bs4 so want to check if it's available owslib_wcs = lazy_module("owslib.wcs") +owslib_util = lazy_module("owslib.util") rasterio = lazy_module("rasterio") logger = logging.getLogger(__name__) +class MockWCSClient(tl.HasTraits): + source = tl.Unicode() + version = tl.Enum(["1.0.0"], default_value="1.0.0") + headers = None + cookies = None + auth = tl.Any() + + def getCoverage( + self, + identifier=None, + bbox=None, + time=None, + format=None, + crs=None, + width=None, + height=None, + resx=None, + resy=None, + resz=None, + parameter=None, + method="Get", + timeout=30, + **kwargs + ): + """Request and return a coverage from the WCS as a file-like object + note: additional **kwargs helps with multi-version implementation + core keyword arguments should be supported cross version + example: + cvg=wcs.getCoverage(identifier=['TuMYrRQ4'], timeSequence=['2792-06-01T00:00:00.0'], bbox=(-112,36,-106,41), + format='cf-netcdf') + is equivalent to: + http://myhost/mywcs?SERVICE=WCS&REQUEST=GetCoverage&IDENTIFIER=TuMYrRQ4&VERSION=1.1.0&BOUNDINGBOX=-180,-90,180,90&TIME=2792-06-01T00:00:00.0&FORMAT=cf-netcdf + """ + from owslib.util import makeString + from urllib.parse import urlencode + from owslib.util import openURL + + if logger.isEnabledFor(logging.DEBUG): + msg = "WCS 1.0.0 DEBUG: Parameters passed to GetCoverage: identifier={}, bbox={}, time={}, format={}, crs={}, width={}, height={}, resx={}, resy={}, resz={}, parameter={}, method={}, other_arguments={}" # noqa + logger.debug( + msg.format( + identifier, bbox, time, format, crs, width, height, resx, resy, resz, parameter, method, str(kwargs) + ) + ) + + base_url = self.source + + logger.debug("WCS 1.0.0 DEBUG: base url of server: %s" % base_url) + + # process kwargs + request = {"version": self.version, "request": "GetCoverage", "service": "WCS"} + assert len(identifier) > 0 + request["Coverage"] = identifier + # request['identifier'] = ','.join(identifier) + if bbox: + request["BBox"] = ",".join([makeString(x) for x in bbox]) + else: + request["BBox"] = None + if time: + request["time"] = ",".join(time) + if crs: + request["crs"] = crs + request["format"] = format + if width: + request["width"] = width + if height: + request["height"] = height + if resx: + request["resx"] = resx + if resy: + request["resy"] = resy + if resz: + request["resz"] = resz + + # anything else e.g. vendor specific parameters must go through kwargs + if kwargs: + for kw in kwargs: + request[kw] = kwargs[kw] + + # encode and request + data = urlencode(request) + logger.debug("WCS 1.0.0 DEBUG: Second part of URL: %s" % data) + + u = openURL(base_url, data, method, self.cookies, auth=self.auth, timeout=timeout, headers=self.headers) + return u + + class WCSError(NodeException): pass @@ -54,6 +143,13 @@ class WCSRaw(DataSource): max_size : int maximum request size, optional. If provided, the coordinates will be tiled into multiple requests. + allow_mock_client : bool + Default is False. If True, a mock client will be used to make WCS requests. This allows returns + from servers with only partial WCS implementations. + username : str + Username for servers that require authentication + password : str + Password for servers that require authentication See Also -------- @@ -64,6 +160,9 @@ class WCSRaw(DataSource): layer = tl.Unicode().tag(attr=True) version = tl.Unicode(default_value="1.0.0").tag(attr=True) interpolation = tl.Unicode(default_value=None, allow_none=True).tag(attr=True) + allow_mock_client = tl.Bool(False).tag(attr=True) + username = tl.Unicode(allow_none=True) + password = tl.Unicode(allow_none=True) format = tl.CaselessStrEnum(["geotiff", "geotiff_byte"], default_value="geotiff") crs = tl.Unicode(default_value="EPSG:4326") @@ -75,9 +174,25 @@ class WCSRaw(DataSource): _evaluated_coordinates = tl.Instance(Coordinates) coordinate_index_type = "slice" + @property + def auth(self): + if self.username and self.password: + return owslib_util.Authentication(username=self.username, password=self.password) + return None + @cached_property def client(self): - return owslib_wcs.WebCoverageService(self.source, version=self.version) + try: + return owslib_wcs.WebCoverageService(self.source, version=self.version, auth=self.auth) + except Exception as e: + if self.allow_mock_client: + logger.warning( + "The OWSLIB Client could not be used. Server endpoint likely does not implement GetCapabilities" + "requests. Using Mock client instead. Error was {}".format(e) + ) + return MockWCSClient(source=self.source, version=self.version, auth=self.auth) + else: + raise e def get_coordinates(self): """ @@ -86,18 +201,32 @@ def get_coordinates(self): metadata = self.client.contents[self.layer] - # TODO select correct boundingbox by crs - # coordinates - w, s, e, n = metadata.boundingBoxWGS84 + bbox = metadata.boundingBoxWGS84 + crs = "EPSG:4326" + logging.debug("WCS available boundingboxes: {}".format(metadata.boundingboxes)) + for bboxes in metadata.boundingboxes: + if bboxes["nativeSrs"] == self.crs: + bbox = bboxes["bbox"] + crs = self.crs + break + low = metadata.grid.lowlimits high = metadata.grid.highlimits xsize = int(high[0]) - int(low[0]) ysize = int(high[1]) - int(low[1]) + # Based on https://www.ctps.org/geoserver/web/wicket/bookmarkable/org.geoserver.wcs.web.demo.WCSRequestBuilder;jsessionid=9E2AA99F95410C694D05BA609F25527C?0 + # The above link points to a geoserver implementation, which is the reference implementation. + # WCS version 1.0.0 always has order lon/lat while version 1.1.1 actually follows the CRS + if self.version == "1.0.0": + rbbox = {"lat": [bbox[1], bbox[3], ysize], "lon": [bbox[0], bbox[2], xsize]} + else: + rbbox = resolve_bbox_order(bbox, crs, (xsize, ysize)) + coords = [] - coords.append(UniformCoordinates1d(s, n, size=ysize, name="lat")) - coords.append(UniformCoordinates1d(w, e, size=xsize, name="lon")) + coords.append(UniformCoordinates1d(rbbox["lat"][0], rbbox["lat"][1], size=rbbox["lat"][2], name="lat")) + coords.append(UniformCoordinates1d(rbbox["lon"][0], rbbox["lon"][1], size=rbbox["lon"][2], name="lon")) if metadata.timepositions: coords.append(ArrayCoordinates1d(metadata.timepositions, name="time")) @@ -105,7 +234,7 @@ def get_coordinates(self): if metadata.timelimits: raise NotImplementedError("TODO") - return Coordinates(coords, crs=self.crs) + return Coordinates(coords, crs=crs) def _eval(self, coordinates, output=None, _selector=None): """Evaluates this node using the supplied coordinates. @@ -134,6 +263,15 @@ def _eval(self, coordinates, output=None, _selector=None): ValueError Cannot evaluate these coordinates """ + # The mock client cannot figure out the real coordinates, so just duplicate the requested coordinates + if isinstance(self.client, MockWCSClient): + if not coordinates["lat"].is_uniform or not coordinates["lon"].is_uniform: + raise NotImplementedError( + "When using the Mock WCS client, the requested coordinates need to be uniform." + ) + self.set_trait("_coordinates", coordinates) + self.set_trait("crs", coordinates.crs) + # remove extra dimensions extra = [ c.name @@ -247,9 +385,16 @@ def _get_chunk(self, coordinates): % (self.source, self.layer, (w, n, e, s), (width, height)) ) + crs = pyproj.CRS(coordinates.crs) + bbox = (min(w, e), min(s, n), max(e, w), max(n, s)) + # Based on the spec I need the following line, but + # all my tests on other servers suggests I don't need this... + # if crs.axis_info[0].direction == "north": + # bbox = (min(s, n), min(w, e), max(n, s), max(e, w)) + response = self.client.getCoverage( identifier=self.layer, - bbox=(w, n, e, s), + bbox=bbox, width=width, height=height, crs=self.crs, @@ -276,6 +421,13 @@ def _get_chunk(self, coordinates): raise NotImplementedError("TODO") data = dataset.read(1).astype(float) + + # Need to fix the data order. The request and response order is always the same in WCS, but not in PODPAC + if n > s: # By default it returns the data upside down, so this is backwards + data = data[::-1] + if e < w: + data = data[:, ::-1] + return data @classmethod diff --git a/podpac/core/data/ogr.py b/podpac/core/data/ogr.py new file mode 100644 index 000000000..d41a8e5c5 --- /dev/null +++ b/podpac/core/data/ogr.py @@ -0,0 +1,120 @@ +import os.path + +import numpy as np +import traitlets as tl + +from lazy_import import lazy_module + +gdal = lazy_module("osgeo.gdal") +ogr = lazy_module("osgeo.ogr") + +from podpac import Node, Coordinates, cached_property, settings, clinspace +from podpac.core.utils import common_doc +from podpac.core.node import COMMON_NODE_DOC +from podpac.core.interpolation.interpolation import InterpolationMixin + + +class OGRRaw(Node): + """ """ + + source = tl.Unicode().tag(attr=True) + layer = tl.Unicode().tag(attr=True) + attribute = tl.Unicode().tag(attr=True) + nan_vals = tl.List().tag(attr=True) + nan_val = tl.Any(np.nan).tag(attr=True) + driver = tl.Unicode() + + _repr_keys = ["source", "layer", "attribute"] + + # debug traits + _requested_coordinates = tl.Instance(Coordinates, allow_none=True) + _evaluated_coordinates = tl.Instance(Coordinates, allow_none=True) + + @tl.validate("driver") + def _validate_driver(self, d): + ogr.GetDriverByName(d["value"]) + return d["value"] + + @tl.validate("source") + def _validate_source(self, d): + if not os.path.exists(d["value"]): + raise ValueError("OGR source not found '%s'" % d["value"]) + return d["value"] + + @cached_property + def datasource(self): + driver = ogr.GetDriverByName(self.driver) + return driver.Open(self.source, 0) + + @cached_property + def extents(self): + layer = self.datasource.GetLayerByName(self.layer) + return layer.GetExtent() + + @common_doc(COMMON_NODE_DOC) + def _eval(self, coordinates, output=None, _selector=None): + if "lat" not in coordinates.udims or "lon" not in coordinates.udims: + raise RuntimeError("OGR source requires lat and lon dims") + + requested_coordinates = coordinates + coordinates = coordinates.udrop(["time", "alt"], ignore_missing=True) + + if coordinates.size == 1 or "lat_lon" in coordinates or "lon_lat" in coordinates: + # point or points + eps = 1e-6 + data = np.empty(coordinates.size) + for i, (lat, lon) in enumerate(zip(coordinates["lat"].coordinates, coordinates["lon"].coordinates)): + geotransform = [lon - eps / 2.0, eps, 0.0, lat - eps / 2.0, 0.0, -1.0 * eps] + data[i] = self._get_data(1, 1, geotransform) + data = data.reshape(coordinates.shape) + + else: + # resample non-uniform coordinates if necessary + if not coordinates["lat"].is_uniform: + coordinates["lat"] = clinspace( + coordinates["lat"].bounds[0], coordinates["lat"].bounds[1], coordinates["lat"].size, name="lat" + ) + if not coordinates["lon"].is_uniform: + coordinates["lon"] = clinspace( + coordinates["lon"].bounds[0], coordinates["lon"].bounds[1], coordinates["lon"].size, name="lon" + ) + + # evaluate uniform grid + data = self._get_data(coordinates["lon"].size, coordinates["lat"].size, coordinates.geotransform) + + if output is None: + output = self.create_output_array(coordinates, data=data) + else: + output.data[:] = data + + # nan values + output.data[np.isin(output.data, self.nan_vals)] = self.nan_val + + if settings["DEBUG"]: + self._requested_coordinates = requested_coordinates + self._evaluated_coordinates = coordinates + + return output + + def _get_data(self, xsize, ysize, geotransform): + nan_val = 0 + + # create target datasource + driver = gdal.GetDriverByName("MEM") + target = driver.Create("", xsize, ysize, gdal.GDT_Float64) + target.SetGeoTransform(geotransform) + band = target.GetRasterBand(1) + band.SetNoDataValue(nan_val) + band.Fill(nan_val) + + # rasterize + layer = self.datasource.GetLayerByName(self.layer) + gdal.RasterizeLayer(target, [1], layer, options=["ATTRIBUTE=%s" % self.attribute]) + + data = band.ReadAsArray(buf_type=gdal.GDT_Float64).copy() + data[data == nan_val] = np.nan + return data + + +class OGR(InterpolationMixin, OGRRaw): + interpolation = "nearest" diff --git a/podpac/core/data/rasterio_source.py b/podpac/core/data/rasterio_source.py index dabab8695..478db71a2 100644 --- a/podpac/core/data/rasterio_source.py +++ b/podpac/core/data/rasterio_source.py @@ -2,6 +2,7 @@ from collections import OrderedDict import io +from podpac.core.coordinates.array_coordinates1d import ArrayCoordinates1d import re from six import string_types @@ -16,7 +17,7 @@ boto3 = lazy_module("boto3") from podpac.core.utils import common_doc, cached_property -from podpac.core.coordinates import UniformCoordinates1d, Coordinates +from podpac.core.coordinates import UniformCoordinates1d, Coordinates, merge_dims from podpac.core.data.datasource import COMMON_DATA_DOC, DATA_DOC from podpac.core.data.file_source import BaseFileSource from podpac.core.authentication import S3Mixin @@ -44,6 +45,11 @@ class RasterioRaw(S3Mixin, BaseFileSource): specify the crs in case this information is missing from the file. aws_https: bool Default is True. If False, will not use https when reading from AWS. This is useful for debugging when SSL certificates are invalid. + prefer_overviews: bool, optional + Default is False. If True, will pull data from an overview with the closest resolution (step size) matching the smallest resolution + in the request. + prefer_overviews_closest: bool, optional + Default is False. If True, will find the closest overview instead of the closest See Also -------- @@ -54,14 +60,28 @@ class RasterioRaw(S3Mixin, BaseFileSource): crs = tl.Unicode(allow_none=True, default_value=None).tag(attr=True) driver = tl.Unicode(allow_none=True, default_value=None) - coordinate_index_type = "slice" - aws_https = tl.Bool(True) + coordinate_index_type = tl.Unicode() + aws_https = tl.Bool(True).tag(attr=True) + prefer_overviews = tl.Bool(False).tag(attr=True) + prefer_overviews_closest = tl.Bool(False).tag(attr=True) + + @tl.default("coordinate_index_type") + def _default_coordinate_index_type(self): + if self.prefer_overviews: + return "numpy" + else: + return "slice" @cached_property def dataset(self): - envargs = {} - - if self.source.startswith("s3://"): + return self.open_dataset(self.source) + + def open_dataset(self, source, overview_level=None): + envargs = {"AWS_HTTPS": self.aws_https} + kwargs = {} + if overview_level is not None: + kwargs = {'overview_level': overview_level} + if source.startswith("s3://"): envargs["session"] = rasterio.session.AWSSession( aws_access_key_id=self.aws_access_key_id, aws_secret_access_key=self.aws_secret_access_key, @@ -70,9 +90,11 @@ def dataset(self): aws_unsigned=self.anon, ) - with rasterio.env.Env(**envargs) as env: - _logger.debug("Rasterio environment options: {}".format(env.options)) - return rasterio.open(self.source) + with rasterio.env.Env(**envargs) as env: + _logger.debug("Rasterio environment options: {}".format(env.options)) + return rasterio.open(source, **kwargs) + else: + return rasterio.open(source, **kwargs) @tl.default("band") def _band_default(self): @@ -106,24 +128,31 @@ def get_coordinates(self): # check to see if the coordinates are rotated used affine affine = self.dataset.transform - + validate_crs = True if self.crs is not None: crs = self.crs elif isinstance(self.dataset.crs, rasterio.crs.CRS) and "init" in self.dataset.crs: crs = self.dataset.crs["init"].upper() + if self.dataset.crs.is_valid: + validate_crs = False elif isinstance(self.dataset.crs, dict) and "init" in self.dataset.crs: crs = self.dataset.crs["init"].upper() + if self.dataset.crs.is_valid: + validate_crs = False else: try: crs = pyproj.CRS(self.dataset.crs).to_wkt() except pyproj.exceptions.CRSError: raise RuntimeError("Unexpected rasterio crs '%s'" % self.dataset.crs) - return Coordinates.from_geotransform(affine.to_gdal(), self.dataset.shape, crs) + return Coordinates.from_geotransform(affine.to_gdal(), self.dataset.shape, crs, validate_crs) @common_doc(COMMON_DATA_DOC) def get_data(self, coordinates, coordinates_index): """{get_data}""" + if self.prefer_overviews: + return self.get_data_overviews(coordinates, coordinates_index) + data = self.create_output_array(coordinates) slc = coordinates_index @@ -140,10 +169,87 @@ def get_data(self, coordinates, coordinates_index): data.data.ravel()[:] = raster_data.ravel() return data + def get_data_overviews(self, coordinates, coordinates_index): + # Figure out how much coarser the request is than the actual data + reduction_factor = np.inf + for c in ["lat", "lon"]: + crd = coordinates[c] + if crd.size == 1: + reduction_factor = 0 + break + if isinstance(crd, UniformCoordinates1d): + min_delta = crd.step + elif isinstance(crd, ArrayCoordinates1d) and crd.is_monotonic: + min_delta = crd.deltas.min() + else: + raise NotImplementedError( + "The Rasterio node with prefer_overviews=True currently does not support request coordinates type {}".format( + coordinates + ) + ) + reduction_factor = min( + reduction_factor, np.abs(min_delta / self.coordinates[c].step) # self.coordinates is always uniform + ) + # Find the overview that's closest to this reduction factor + if reduction_factor < 2: # Then we shouldn't use an overview + overview = 1 + overview_level = None + else: + diffs = reduction_factor - np.array(self.overviews) + if self.prefer_overviews_closest: + diffs = np.abs(diffs) + else: + diffs[diffs < 0] = np.inf + overview_level = np.argmin(diffs) + overview = self.overviews[np.argmin(diffs)] + + # Now read the data + inds = coordinates_index + if overview_level is None: + dataset = self.dataset + else: + dataset = self.open_dataset(self.source, overview_level) + try: + # read data within coordinates_index window at the resolution of the overview + # Rasterio will then automatically pull from the overview + window = ( + ((inds[0].min() // overview), int(np.ceil(inds[0].max() / overview) + 1)), + ((inds[1].min() // overview), int(np.ceil(inds[1].max() / overview) + 1)), + ) + slc = (slice(window[0][0], window[0][1], 1), slice(window[1][0], window[1][1], 1)) + new_coords = Coordinates.from_geotransform(dataset.transform.to_gdal(), dataset.shape, crs=self.coordinates.crs) + new_coords = new_coords[slc] + missing_coords = self.coordinates.drop(['lat', 'lon']) + new_coords = merge_dims([new_coords, missing_coords]) + new_coords = new_coords.transpose(*self.coordinates.dims) + coordinates_shape = new_coords.shape[:2] + + # The following lines are *nearly* copied/pasted from get_data + if self.outputs is not None: # read all the bands + raster_data = dataset.read(out_shape=(len(self.outputs),) + coordinates_shape, window=window) + raster_data = np.moveaxis(raster_data, 0, 2) + else: # read the requested band + raster_data = dataset.read(self.band, out_shape=coordinates_shape, window=window) + + # set raster data to output array + data = self.create_output_array(new_coords) + data.data.ravel()[:] = raster_data.ravel() + except Exception as e: + _logger.error("Error occurred when reading overview with Rasterio: {}".format(e)) + + if overview_level is not None: + dataset.close() + + return data + # ------------------------------------------------------------------------- # additional methods and properties # ------------------------------------------------------------------------- + @property + def overviews(self): + return self.dataset.overviews(self.band) + @property def tags(self): return self.dataset.tags() diff --git a/podpac/core/data/test/test_ogr.py b/podpac/core/data/test/test_ogr.py new file mode 100644 index 000000000..0d6a54898 --- /dev/null +++ b/podpac/core/data/test/test_ogr.py @@ -0,0 +1,86 @@ +import pytest +import numpy as np + +import podpac +from podpac.core.data.ogr import OGR, OGRRaw + + +@pytest.mark.skip(reason="No test file available yet") +class TestOGRRaw(object): + source = "TODO" + driver = "ESRI Shapefile" + layer = "TODO" + attribute = "TODO" + + def test_extents(self): + node = OGRRaw(source=self.source, driver=self.driver, layer=self.layer, attribute=self.attribute) + node.extents + + def test_eval_uniform(self): + node = OGRRaw(source=self.source, driver=self.driver, layer=self.layer, attribute=self.attribute) + coords = podpac.Coordinates([podpac.clinspace(43, 44, 10), podpac.clinspace(-73, -72, 10)], dims=["lat", "lon"]) + output = node.eval(coords) + + def test_eval_point(self): + node = OGRRaw(source=self.source, driver=self.driver, layer=self.layer, attribute=self.attribute) + coords = podpac.Coordinates([43.7, -72.3], dims=["lat", "lon"]) + output = node.eval(coords) + + def test_eval_stacked(self): + node = OGRRaw(source=self.source, driver=self.driver, layer=self.layer, attribute=self.attribute) + coords = podpac.Coordinates([[[43, 43.5, 43.7], [-72.0, -72.5, -72.7]]], dims=["lat_lon"]) + output = node.eval(coords) + + def test_eval_nonuniform(self): + node = OGRRaw(source=self.source, driver=self.driver, layer=self.layer, attribute=self.attribute) + coords = podpac.Coordinates([[43, 43.5, 43.7], [-72.0, -72.5, -72.7]], dims=["lat", "lon"]) + output = node.eval(coords) + + # coordinates are resampled to be uniform + np.testing.assert_array_equal(output["lat"], [43, 43.35, 43.7]) + np.testing.assert_array_equal(output["lon"], [-72.7, -72.35, -72]) + + def test_eval_extra_dims(self): + node = OGRRaw(source=self.source, driver=self.driver, layer=self.layer, attribute=self.attribute) + coords = podpac.Coordinates( + [podpac.clinspace(43, 44, 10), podpac.clinspace(-73, -72, 10), "2018-01-01"], dims=["lat", "lon", "time"] + ) + output = node.eval(coords) + + def test_eval_missing_dims(self): + node = OGRRaw(source=self.source, driver=self.driver, layer=self.layer, attribute=self.attribute) + coords = podpac.Coordinates(["2018-01-01"], dims=["time"]) + with pytest.raises(RuntimeError, match="OGR source requires lat and lon dims"): + output = node.eval(coords) + + +@pytest.mark.skip(reason="No test file available yet") +class TestOGR(object): + source = "TODO" + driver = "ESRI Shapefile" + layer = "TODO" + attribute = "TODO" + + def test_eval_uniform(self): + node = OGR(source=self.source, driver=self.driver, layer=self.layer, attribute=self.attribute) + coords = podpac.Coordinates([podpac.clinspace(43, 44, 10), podpac.clinspace(-73, -72, 10)], dims=["lat", "lon"]) + output = node.eval(coords) + + def test_eval_point(self): + node = OGR(source=self.source, driver=self.driver, layer=self.layer, attribute=self.attribute) + coords = podpac.Coordinates([43.7, -72.3], dims=["lat", "lon"]) + output = node.eval(coords) + + def test_eval_stacked(self): + node = OGR(source=self.source, driver=self.driver, layer=self.layer, attribute=self.attribute) + coords = podpac.Coordinates([[[43, 43.5, 43.7], [-72.0, -72.5, -72.7]]], dims=["lat_lon"]) + output = node.eval(coords) + + def test_eval_nonuniform(self): + node = OGR(source=self.source, driver=self.driver, layer=self.layer, attribute=self.attribute) + coords = podpac.Coordinates([[43, 43.5, 43.7], [-72.0, -72.5, -72.7]], dims=["lat", "lon"]) + output = node.eval(coords) + + # coordinates are interpolated back to the requested coordinates + np.testing.assert_array_equal(output["lat"], coords["lat"].coordinates) + np.testing.assert_array_equal(output["lon"], coords["lon"].coordinates) diff --git a/podpac/core/data/test/test_wcs.py b/podpac/core/data/test/test_wcs.py index fb43a2c12..973e67296 100644 --- a/podpac/core/data/test/test_wcs.py +++ b/podpac/core/data/test/test_wcs.py @@ -1,12 +1,14 @@ import pytest import traitlets as tl from io import BytesIO +import numpy as np import podpac from podpac.core.data.ogc import WCS, WCSRaw COORDS = podpac.Coordinates( - [podpac.clinspace(-132.9023, -53.6051, 100, name="lon"), podpac.clinspace(23.6293, 53.7588, 100, name="lat")] + [podpac.clinspace(-132.9023, -53.6051, 100, name="lon"), podpac.clinspace(23.6293, 53.7588, 100, name="lat")], + # crs="EPSG:4326", ) @@ -93,7 +95,8 @@ def test_eval_uniform_stacked(self): node = MockWCSRaw(source="mock", layer="mock") output = node.eval(c) assert output.shape == (100,) - assert output.data.sum() == 14350.0 + # MPU Note: changed from 14350.0 to 12640.0 based on np.diag(node.eval(COORDS)).sum() + assert output.data.sum() == 12640.0 def test_eval_extra_unstacked_dim(self): c = podpac.Coordinates(["2020-01-01", COORDS["lat"], COORDS["lon"]], dims=["time", "lat", "lon"]) @@ -158,7 +161,8 @@ def test_eval_uniform_stacked(self): node = MockWCS(source="mock", layer="mock") output = node.eval(c) assert output.shape == (100,) - assert output.data.sum() == 14350.0 + # MPU Note: changed from 14350.0 to 12640.0 based on np.diag(node.eval(COORDS)).sum() + assert output.data.sum() == 12640.0 @pytest.mark.integration @@ -167,7 +171,6 @@ class TestWCSIntegration(object): def setup_class(cls): cls.node1 = WCSRaw(source=cls.source, layer="sand_0-5cm_mean", format="geotiff_byte", max_size=16384) - cls.node2 = WCS(source=cls.source, layer="sand_0-5cm_mean", format="geotiff_byte", max_size=16384) def test_coordinates(self): @@ -195,7 +198,7 @@ def test_eval_uniform_stacked(self): def test_eval_chunked(self): node = WCSRaw(source=self.source, layer="sand_0-5cm_mean", format="geotiff_byte", max_size=4000) - node.eval(COORDS) + o1 = node.eval(COORDS) def test_eval_other_crs(self): c = COORDS.transform("EPSG:3395") diff --git a/podpac/core/interpolation/interpolation.py b/podpac/core/interpolation/interpolation.py index 5d7f18539..313813b9e 100644 --- a/podpac/core/interpolation/interpolation.py +++ b/podpac/core/interpolation/interpolation.py @@ -1,4 +1,5 @@ from __future__ import division, unicode_literals, print_function, absolute_import +from podpac.core.cache import cache_ctrl import traitlets as tl from copy import deepcopy @@ -15,6 +16,7 @@ from podpac.core.units import UnitsDataArray from podpac.core.coordinates import merge_dims, Coordinates from podpac.core.interpolation.interpolation_manager import InterpolationManager, InterpolationTrait +from podpac.core.cache.cache_ctrl import CacheCtrl _logger = logging.getLogger(__name__) @@ -28,12 +30,17 @@ def _repr_keys(self): return super()._repr_keys + ["interpolation"] def _eval(self, coordinates, output=None, _selector=None): - node = Interpolate(interpolation=self.interpolation) + node = Interpolate( + interpolation=self.interpolation, source_id=self.hash, force_eval=True, cache_ctrl=CacheCtrl([]) + ) node._set_interpolation() selector = node._interpolation.select_coordinates node._source_xr = super()._eval(coordinates, _selector=selector) self._interp_node = node - return node.eval(coordinates, output=output) + r = node.eval(coordinates, output=output) + # Helpful for debugging + self._from_cache = node._from_cache + return r class Interpolate(Node): @@ -45,7 +52,7 @@ class Interpolate(Node): The source node which will be interpolated interpolation : str, dict, optional Interpolation definition for the data source. - By default, the interpolation method is set to ``'nearest'`` for all dimensions. + By default, the interpolation method is set to `podpac.settings["DEFAULT_INTERPOLATION"]` which defaults to ``'nearest'`` for all dimensions. If input is a string, it must match one of the interpolation shortcuts defined in :attr:`podpac.data.INTERPOLATION_SHORTCUTS`. The interpolation method associated @@ -95,6 +102,7 @@ class will be used without modification. """ source = NodeTrait(allow_none=True).tag(attr=True) + source_id = tl.Unicode(allow_none=True).tag(attr=True) _source_xr = tl.Instance(UnitsDataArray, allow_none=True) # This is needed for the Interpolation Mixin interpolation = InterpolationTrait().tag(attr=True) diff --git a/podpac/core/interpolation/interpolation_manager.py b/podpac/core/interpolation/interpolation_manager.py index 874fe05bf..25ec66e92 100644 --- a/podpac/core/interpolation/interpolation_manager.py +++ b/podpac/core/interpolation/interpolation_manager.py @@ -7,6 +7,7 @@ import numpy as np import traitlets as tl +from podpac.core import settings from podpac.core.units import UnitsDataArray from podpac.core.coordinates import merge_dims, Coordinates from podpac.core.coordinates.utils import VALID_DIMENSION_NAMES @@ -19,7 +20,7 @@ _logger = logging.getLogger(__name__) -INTERPOLATION_DEFAULT = "nearest" +INTERPOLATION_DEFAULT = settings.settings.get("DEFAULT_INTERPOLATION", "nearest") """str : Default interpolation method used when creating a new :class:`Interpolation` class """ INTERPOLATORS = [NearestNeighbor, XarrayInterpolator, RasterioInterpolator, ScipyPoint, ScipyGrid, NearestPreview] @@ -484,7 +485,10 @@ def select_coordinates(self, source_coordinates, eval_coordinates, index_type="n selected_coords_idx[d] = np.arange(source_coordinates[d].size)[selected_coords_idx[d]] selected_coords = Coordinates( - [selected_coords[k] for k in source_coordinates.dims], source_coordinates.dims, crs=source_coordinates.crs + [selected_coords[k] for k in source_coordinates.dims], + source_coordinates.dims, + crs=source_coordinates.crs, + validate_crs=False, ) if index_type == "numpy": selected_coords_idx2 = np.ix_(*[np.ravel(selected_coords_idx[k]) for k in source_coordinates.dims]) diff --git a/podpac/core/interpolation/interpolator.py b/podpac/core/interpolation/interpolator.py index 9cca5ce4e..1669dbab9 100644 --- a/podpac/core/interpolation/interpolator.py +++ b/podpac/core/interpolation/interpolator.py @@ -71,24 +71,24 @@ class is constructed. See the :class:`podpac.data.DataSource` `interpolation` at The alt_scale defines the factor that alt coordinates will be scaled by (coordinates are divided by alt_scale) to output a valid distance for the combined set of dimensions. respect_bounds : bool - Default is True. If True, any requested dimension OUTSIDE of the bounds will be interpolated as 'nan'. - Otherwise, any point outside the bounds will have NN interpolation allowed. + Default is True. If True, any requested dimension OUTSIDE of the bounds will be interpolated as 'nan'. + Otherwise, any point outside the bounds will have NN interpolation allowed. remove_nan: bool Default is False. If True, nan's in the source dataset will NOT be interpolated. This can be used if a value for the function is needed at every point of the request. It is not helpful when computing statistics, where nan values will be explicitly - ignored. In that case, if remove_nan is True, nan values will take on the values of neighbors, skewing the statistical result. + ignored. In that case, if remove_nan is True, nan values will take on the values of neighbors, skewing the statistical result. use_selector: bool - Default is True. If True, a subset of the coordinates will be selected BEFORE the data of a dataset is retrieved. This + Default is True. If True, a subset of the coordinates will be selected BEFORE the data of a dataset is retrieved. This reduces the number of data retrievals needed for large datasets. In cases where remove_nan = True, the selector may select only nan points, in which case the interpolation fails to produce non-nan data. This usually happens when requesting a single - point from a dataset that contains nans. As such, in these cases set use_selector = False to get a non-nan value. + point from a dataset that contains nans. As such, in these cases set use_selector = False to get a non-nan value. """, "interpolator_can_select": """ Evaluate if interpolator can downselect the source coordinates from the requested coordinates for the unstacked dims supplied. If not overwritten, this method returns an empty tuple (``tuple()``) - + Parameters ---------- udims : tuple @@ -97,7 +97,7 @@ class is constructed. See the :class:`podpac.data.DataSource` `interpolation` at Description eval_coordinates : :class:`podpac.Coordinates` Description - + Returns ------- tuple @@ -106,7 +106,7 @@ class is constructed. See the :class:`podpac.data.DataSource` `interpolation` at """, "interpolator_select": """ Downselect coordinates with interpolator method - + Parameters ---------- udims : tuple @@ -117,7 +117,7 @@ class is constructed. See the :class:`podpac.data.DataSource` `interpolation` at Description eval_coordinates : :class:`podpac.Coordinates` Description - + Returns ------- (:class:`podpac.Coordinates`, list) @@ -131,7 +131,7 @@ class is constructed. See the :class:`podpac.data.DataSource` `interpolation` at "interpolator_can_interpolate": """ Evaluate if this interpolation method can handle the requested coordinates and source_coordinates. If not overwritten, this method returns an empty tuple (`tuple()`) - + Parameters ---------- udims : tuple @@ -140,7 +140,7 @@ class is constructed. See the :class:`podpac.data.DataSource` `interpolation` at Description eval_coordinates : :class:`podpac.Coordinates` Description - + Returns ------- tuple @@ -149,7 +149,7 @@ class is constructed. See the :class:`podpac.data.DataSource` `interpolation` at """, "interpolator_interpolate": """ Interpolate data from requested coordinates to source coordinates. - + Parameters ---------- udims : tuple @@ -162,11 +162,11 @@ class is constructed. See the :class:`podpac.data.DataSource` `interpolation` at Description output_data : podpac.core.units.UnitsDataArray Description - + Raises ------ NotImplementedError - + Returns ------- podpac.core.units.UnitDataArray diff --git a/podpac/core/interpolation/nearest_neighbor_interpolator.py b/podpac/core/interpolation/nearest_neighbor_interpolator.py index b02dcf2a1..e2fcf6010 100644 --- a/podpac/core/interpolation/nearest_neighbor_interpolator.py +++ b/podpac/core/interpolation/nearest_neighbor_interpolator.py @@ -210,12 +210,6 @@ def _get_stacked_index(self, dim, source, request, bounds=None): # The udims are in the order of the request so that the meshgrid calls will be in the right order udims = [ud for ud in request.udims if ud in source.udims] - # Subselect bounds if needed - if np.any([d not in udims for d in dim.split("_")]): - dims = dim.split("_") - cols = np.array([d in udims for d in dims], dtype=bool) - bounds = bounds[:, cols] - time_source = time_request = None if "time" in udims: time_source = source["time"] @@ -283,7 +277,7 @@ def _get_uniform_index(self, dim, source, request, bounds=None): step = self._time_to_float(source.step, source, request) else: step = source.step - rindex[np.abs(index - rindex) * step > tol] = -1 + rindex[np.abs(index - rindex) * np.abs(step) > tol] = -1 return rindex diff --git a/podpac/core/interpolation/selector.py b/podpac/core/interpolation/selector.py index c6ea006e9..70ffb7824 100644 --- a/podpac/core/interpolation/selector.py +++ b/podpac/core/interpolation/selector.py @@ -98,11 +98,11 @@ def select(self, source_coords, request_coords, index_type="numpy"): coords = [] coords_inds = [] for coord1d in source_coords._coords.values(): - c, ci = self._select1d(coord1d, request_coords, index_type) + ci = self._select1d(coord1d, request_coords, index_type) ci = np.sort(np.unique(ci)) if index_type == "slice": ci = _index2slice(ci) - c = c[ci] + c = coord1d[ci] coords.append(c) coords_inds.append(ci) coords = Coordinates(coords) @@ -122,7 +122,7 @@ def _select1d(self, source, request, index_type): # else: # _logger.info("Coordinates are not subselected for source {} with request {}".format(source, request)) # return source, slice(0, None) - return source, ci + return ci def _merge_indices(self, indices, source_dims, request_dims): # For numpy to broadcast correctly, we have to reshape each of the indices @@ -135,8 +135,12 @@ def _merge_indices(self, indices, source_dims, request_dims): def _select_uniform(self, source, request, index_type): crds = request[source.name] - if crds.is_uniform and crds.step < source.step and not request.is_stacked(source.name): - return np.arange(source.size) + if crds.is_uniform and abs(crds.step) < abs(source.step) and not request.is_stacked(source.name): + start_ind = np.clip((crds.start - source.start) / source.step, 0, source.size) + end_ind = np.clip((crds.stop - source.start) / source.step, 0, source.size) + start = int(np.floor(max(0, min(start_ind, end_ind) + min(self.method) - 1e-6))) + stop = int(np.ceil(min(source.size, max(start_ind, end_ind) + max(self.method) + 1 + 1e-6))) + return np.arange(start, stop) index = (crds.coordinates - source.start) / source.step stop_ind = source.size - 1 @@ -146,13 +150,12 @@ def _select_uniform(self, source, request, index_type): # In this case, floating point error really matters, so we have to do a test up = np.round(index - 1e-6) down = np.round(index + 1e-6) - # When up and down do not agree, use the index that will be kept. + # When up and down do not agree, make sure both the indices that will be kept. index_mid = down # arbitrarily default to down when both satisfy criteria Iup = up != down Iup[Iup] = (up[Iup] >= 0) & (up[Iup] <= stop_ind) & ((up[Iup] > down.max()) | (up[Iup] < down.min())) index_mid[Iup] = up[Iup] flr_ceil = {0: index_mid} - # flr_ceil = {0: index} inds = [] for m in self.method: diff --git a/podpac/core/interpolation/test/test_selector.py b/podpac/core/interpolation/test/test_selector.py index 6ad199974..4ebb380a9 100644 --- a/podpac/core/interpolation/test/test_selector.py +++ b/podpac/core/interpolation/test/test_selector.py @@ -149,6 +149,81 @@ def test_bilinear_selector(self): ), ) + def test_bilinear_selector_negative_step(self): + selector = Selector("bilinear") + request1 = Coordinates([clinspace(-0.5, -1, 11)], ["lat"]) + request2 = Coordinates([clinspace(-1, -0.5, 11)], ["lat"]) + source1 = Coordinates([clinspace(-2, 0, 100)], ["lat"]) + source2 = Coordinates([clinspace(0, -2, 100)], ["lat"]) + c11, ci11 = selector.select(source1, request1) + assert len(c11["lat"]) == 22 + assert len(ci11[0]) == 22 + + c12, ci12 = selector.select(source1, request2) + assert len(c12["lat"]) == 22 + assert len(ci12[0]) == 22 + + c21, ci21 = selector.select(source2, request1) + assert len(c21["lat"]) == 22 + assert len(ci21[0]) == 22 + + c22, ci22 = selector.select(source2, request2) + assert len(c22["lat"]) == 22 + assert len(ci22[0]) == 22 + + np.testing.assert_equal(ci11[0], ci12[0]) + np.testing.assert_equal(ci21[0], ci22[0]) + + def test_nearest_selector_negative_step(self): + selector = Selector("nearest") + request1 = Coordinates([clinspace(-0.5, -1, 11)], ["lat"]) + request2 = Coordinates([clinspace(-1, -0.5, 11)], ["lat"]) + source1 = Coordinates([clinspace(-2, 0, 100)], ["lat"]) + source2 = Coordinates([clinspace(0, -2, 100)], ["lat"]) + c11, ci11 = selector.select(source1, request1) + assert len(c11["lat"]) == 11 + assert len(ci11[0]) == 11 + + c12, ci12 = selector.select(source1, request2) + assert len(c12["lat"]) == 11 + assert len(ci12[0]) == 11 + + c21, ci21 = selector.select(source2, request1) + assert len(c21["lat"]) == 11 + assert len(ci21[0]) == 11 + + c22, ci22 = selector.select(source2, request2) + assert len(c22["lat"]) == 11 + assert len(ci22[0]) == 11 + + np.testing.assert_equal(ci11[0], ci12[0]) + np.testing.assert_equal(ci21[0], ci22[0]) + + def test_nearest_selector_negative_time_step(self): + selector = Selector("nearest") + request1 = Coordinates([clinspace("2020-01-01", "2020-01-11", 11)], ["time"]) + request2 = Coordinates([clinspace("2020-01-11", "2020-01-01", 11)], ["time"]) + source1 = Coordinates([clinspace("2020-01-22T00", "2020-01-01T00", 126)], ["time"]) + source2 = Coordinates([clinspace("2020-01-01T00", "2020-01-22T00", 126)], ["time"]) + c11, ci11 = selector.select(source1, request1) + assert len(c11["time"]) == 11 + assert len(ci11[0]) == 11 + + c12, ci12 = selector.select(source1, request2) + assert len(c12["time"]) == 11 + assert len(ci12[0]) == 11 + + c21, ci21 = selector.select(source2, request1) + assert len(c21["time"]) == 11 + assert len(ci21[0]) == 11 + + c22, ci22 = selector.select(source2, request2) + assert len(c22["time"]) == 11 + assert len(ci22[0]) == 11 + + np.testing.assert_equal(ci11[0], ci12[0]) + np.testing.assert_equal(ci21[0], ci22[0]) + def test_nn_selector(self): selector = Selector("nearest") for request in self.coords: diff --git a/podpac/core/interpolation/xarray_interpolator.py b/podpac/core/interpolation/xarray_interpolator.py index b1e8913bd..2a5eab9a8 100644 --- a/podpac/core/interpolation/xarray_interpolator.py +++ b/podpac/core/interpolation/xarray_interpolator.py @@ -116,7 +116,8 @@ def interpolate(self, udims, source_coordinates, source_data, eval_coordinates, for d in source_coordinates.dims: if not np.any(np.isnan(source_data)): break - source_data = source_data.interpolate_na(method=self.method, dim=d) + # use_coordinate=False allows for interpolation when dimension is not monotonically increasing + source_data = source_data.interpolate_na(method=self.method, dim=d, use_coordinate=False) if self.method == "bilinear": self.method = "linear" diff --git a/podpac/core/node.py b/podpac/core/node.py index db60b4d8b..8929efd5b 100644 --- a/podpac/core/node.py +++ b/podpac/core/node.py @@ -26,6 +26,7 @@ from podpac.core.utils import cached_property from podpac.core.utils import trait_is_defined from podpac.core.utils import _get_query_params_from_url, _get_from_url, _get_param +from podpac.core.utils import probe_node from podpac.core.coordinates import Coordinates from podpac.core.style import Style from podpac.core.cache import CacheCtrl, get_default_cache_ctrl, make_cache_ctrl, S3CacheStore, DiskCacheStore @@ -309,6 +310,9 @@ def eval(self, coordinates, **kwargs): # Add style information data.attrs["layer_style"] = self.style + if self.units is not None: + data.attrs["units"] + # Add crs if it is missing if "crs" not in data.attrs: data.attrs["crs"] = coordinates.crs @@ -386,6 +390,38 @@ def create_output_array(self, coords, data=np.nan, attrs=None, **kwargs): def trait_is_defined(self, name): return trait_is_defined(self, name) + def probe(self, lat=None, lon=None, time=None, alt=None, crs=None): + """Evaluates every part of a node / pipeline at a point and records + which nodes are actively being used. + + Parameters + ------------ + lat : float, optional + Default is None. The latitude location + lon : float, optional + Default is None. The longitude location + time : float, np.datetime64, optional + Default is None. The time + alt : float, optional + Default is None. The altitude location + crs : str, optional + Default is None. The CRS of the request. + + Returns + dict + A dictionary that contains the following for each node: + ``` + { + "active": bool, # If the node is being used or not + "value": float, # The value of the node evaluated at that point + "inputs": list, # List of names of input nodes (based on definition) + "name": str, # node.style.name or self.base_ref if the style name is empty + "node_hash": str, # The node's hash + } + ``` + """ + return probe_node(self, lat, lon, time, alt, crs) + # ----------------------------------------------------------------------------------------------------------------- # Serialization # ----------------------------------------------------------------------------------------------------------------- @@ -424,8 +460,16 @@ def _base_definition(self): if ( isinstance(value, Node) - or (isinstance(value, (list, tuple, np.ndarray)) and all(isinstance(elem, Node) for elem in value)) - or (isinstance(value, dict) and all(isinstance(elem, Node) for elem in value.values())) + or ( + isinstance(value, (list, tuple, np.ndarray)) + and (len(value) > 0) + and all(isinstance(elem, Node) for elem in value) + ) + or ( + isinstance(value, dict) + and (len(value) > 0) + and all(isinstance(elem, Node) for elem in value.values()) + ) ): inputs[name] = value else: @@ -543,7 +587,7 @@ def json_pretty(self): return json.dumps(self.definition, indent=4, cls=JSONEncoder) - @property + @cached_property def hash(self): """ hash for this node, used in caching and to determine equality. """ @@ -900,21 +944,52 @@ def from_url(cls, url): p = _get_param(query_params, "PARAMS") if p is None: p = "{}" - p = json.loads(p) - definition = {} - # If one of the special names are in the params list, then add params to the root layer - if "node" in p or "plugin" in p or "style" in p or "attrs" in p: - definition.update(p) - else: - definition["attrs"] = p - definition.update({"node": layer}) # The user-specified node name ALWAYS takes precidence. - d = OrderedDict({layer.replace(".", "-"): definition}) + if not isinstance(p, dict): + p = json.loads(p) + return cls.from_name_params(layer, p) if d is None: d = json.loads(s, object_pairs_hook=OrderedDict) return cls.from_definition(d) + @classmethod + def from_name_params(cls, name, params=None): + """ + Create podpac Node from a WMS/WCS request. + + Arguments + --------- + name : str + The name of the PODPAC Node / Layer + params : dict, optional + Default is None. Dictionary of parameters to modify node attributes, style, or completely/partially define the node. + This dictionary can either be a `Node.definition` or `Node.definition['attrs']`. Node, the specified `name` always + take precidence over anything defined in `params` (e.g. params['node'] won't be used). + + Returns + ------- + :class:`Node` + A full Node with sub-nodes based on the definition of the node from the node name and parameters + + """ + layer = name + p = params + + d = None + if p is None: + p = {} + definition = {} + # If one of the special names are in the params list, then add params to the root layer + if "node" in p or "plugin" in p or "style" in p or "attrs" in p: + definition.update(p) + else: + definition["attrs"] = p + definition.update({"node": layer}) # The user-specified node name ALWAYS takes precidence. + d = OrderedDict({layer.replace(".", "-"): definition}) + + return cls.from_definition(d) + def _lookup_input(nodes, name, value): # containers diff --git a/podpac/core/settings.py b/podpac/core/settings.py index af7860ad1..1ba005691 100644 --- a/podpac/core/settings.py +++ b/podpac/core/settings.py @@ -7,6 +7,7 @@ from copy import deepcopy import errno import uuid +import logging from podpac import version @@ -16,6 +17,8 @@ except ImportError: JSONDecodeError = ValueError +_logger = logging.getLogger(__name__) + # Settings Defaults DEFAULT_SETTINGS = { # podpac core settings @@ -30,9 +33,10 @@ "N_THREADS": 8, "CHUNK_SIZE": None, # Size of chunks for parallel processing or large arrays that do not fit in memory "ENABLE_UNITS": True, - "DEFAULT_CRS": "+proj=longlat +datum=WGS84 +no_defs +vunits=m", # EPSG:4326 with vertical units as meters "PODPAC_VERSION": version.semver(), "UNSAFE_EVAL_HASH": uuid.uuid4().hex, # unique id for running unsafe evaluations + "DEFAULT_CRS": "+proj=longlat +datum=WGS84 +no_defs +vunits=m", # EPSG:4326 with vertical units as meters + "DEFAULT_INTERPOLATION": "nearest", # cache "DEFAULT_CACHE": ["ram"], "CACHE_DATASOURCE_OUTPUT_DEFAULT": True, @@ -371,6 +375,12 @@ def allow_unsafe_eval(self): return "PODPAC_UNSAFE_EVAL" in os.environ and os.environ["PODPAC_UNSAFE_EVAL"] == self["UNSAFE_EVAL_HASH"] def set_unsafe_eval(self, allow=False): + _logger.warning( + "DEPRECATION WARNING: The `set_unsafe_eval` method has been deprecated and will be removed in future versions of PODPAC. Use `allow_unrestricted_code_execuation` instead. " + ) + self.allow_unrestricted_code_execution(allow) + + def allow_unrestricted_code_execution(self, allow=False): """Allow unsafe evaluation for this podpac environment Parameters @@ -380,6 +390,9 @@ def set_unsafe_eval(self, allow=False): """ if allow: os.environ["PODPAC_UNSAFE_EVAL"] = self["UNSAFE_EVAL_HASH"] + _logger.warning( + "Setting unrestricted code execution can results in vulnerabilities on publically accessible servers. Use with caution." + ) else: if "PODPAC_UNSAFE_EVAL" in os.environ: os.environ.pop("PODPAC_UNSAFE_EVAL") diff --git a/podpac/core/style.py b/podpac/core/style.py index a3ccaa62d..b93d8388d 100644 --- a/podpac/core/style.py +++ b/podpac/core/style.py @@ -12,6 +12,9 @@ from podpac.core.units import ureg from podpac.core.utils import trait_is_defined, JSONEncoder, TupleTrait +DEFAULT_ENUMERATION_LEGEND = "unknown" +DEFAULT_ENUMERATION_COLOR = (0.2, 0.2, 0.2) + class Style(tl.HasTraits): """Summary @@ -28,9 +31,9 @@ class Style(tl.HasTraits): matplotlib colormap name cmap : matplotlib.cm.ColorMap matplotlib colormap property - enumeration_colors : tuple + enumeration_colors : dict data colors (replaces colormap/cmap) - enumeration_legend : tuple + enumeration_legend : dict data legend, should correspond with enumeration_colors """ @@ -44,8 +47,10 @@ def __init__(self, node=None, *args, **kwargs): units = tl.Unicode(allow_none=True) clim = tl.List(default_value=[None, None]) colormap = tl.Unicode(allow_none=True, default_value=None) - enumeration_legend = TupleTrait(trait=tl.Unicode()) - enumeration_colors = tl.Tuple() + enumeration_legend = tl.Dict(key_trait=tl.Int(), value_trait=tl.Unicode(), default_value=None, allow_none=True) + enumeration_colors = tl.Dict(key_trait=tl.Int(), default_value=None, allow_none=True) + default_enumeration_legend = tl.Unicode(default_value=DEFAULT_ENUMERATION_LEGEND) + default_enumeration_color = tl.Any(default_value=DEFAULT_ENUMERATION_COLOR) @tl.validate("colormap") def _validate_colormap(self, d): @@ -57,16 +62,47 @@ def _validate_colormap(self, d): @tl.validate("enumeration_colors") def _validate_enumeration_colors(self, d): - if d["value"] and self.colormap: + enum_colors = d["value"] + if enum_colors and self.colormap: raise TypeError("Style can have a colormap or enumeration_colors, but not both") - return d["value"] + return enum_colors + + @tl.validate("enumeration_legend") + def _validate_enumeration_legend(self, d): + # validate against enumeration_colors + enum_legend = d["value"] + if not self.enumeration_colors: + raise TypeError("Style enumeration_legend requires enumeration_colors") + if set(enum_legend) != set(self.enumeration_colors): + raise ValueError("Style enumeration_legend keys must match enumeration_colors keys") + return enum_legend + + @property + def full_enumeration_colors(self): + """ Convert enumeration_colors into a tuple suitable for matplotlib ListedColormap. """ + return tuple( + [ + self.enumeration_colors.get(value, self.default_enumeration_color) + for value in range(min(self.enumeration_colors), max(self.enumeration_colors) + 1) + ] + ) + + @property + def full_enumeration_legend(self): + """ Convert enumeration_legend into a tuple suitable for matplotlib. """ + return tuple( + [ + self.enumeration_legend.get(value, self.default_enumeration_legend) + for value in range(min(self.enumeration_legend), max(self.enumeration_legend) + 1) + ] + ) @property def cmap(self): if self.colormap: return matplotlib.cm.get_cmap(self.colormap) elif self.enumeration_colors: - return ListedColormap(self.enumeration_colors) + return ListedColormap(self.full_enumeration_colors) else: return matplotlib.cm.get_cmap("viridis") @@ -96,12 +132,21 @@ def definition(self): d["enumeration_legend"] = self.enumeration_legend if self.enumeration_colors: d["enumeration_colors"] = self.enumeration_colors + if self.default_enumeration_legend != DEFAULT_ENUMERATION_LEGEND: + d["default_enumeration_legend"] = self.default_enumeration_legend + if self.default_enumeration_color != DEFAULT_ENUMERATION_COLOR: + d["default_enumeration_color"] = self.default_enumeration_color if self.clim != [None, None]: d["clim"] = self.clim return d @classmethod def from_definition(cls, d): + # parse enumeration keys to int + if "enumeration_colors" in d: + d["enumeration_colors"] = {int(key): value for key, value in d["enumeration_colors"].items()} + if "enumeration_legend" in d: + d["enumeration_legend"] = {int(key): value for key, value in d["enumeration_legend"].items()} return cls(**d) @classmethod diff --git a/podpac/core/test/test_node.py b/podpac/core/test/test_node.py index 1f220c516..53ba91597 100644 --- a/podpac/core/test/test_node.py +++ b/podpac/core/test/test_node.py @@ -940,6 +940,23 @@ def test_from_url_with_plugin_style_params(self): node = Node.from_url(url0) node = Node.from_url(url1) + def test_from_name_params(self): + # Normal + name = "algorithm.Arange" + node = Node.from_name_params(name) + + # Normal with params + name = "algorithm.CoordData" + params = {"coord_name": "alt"} + node = Node.from_name_params(name, params) + assert node.coord_name == "alt" + + # Plugin style + name = "CoordData" + params = {"plugin": "podpac.algorithm", "attrs": {"coord_name": "alt"}} + node = Node.from_name_params(name, params) + assert node.coord_name == "alt" + def test_style(self): node = podpac.data.Array( source=[10, 20, 30], diff --git a/podpac/core/test/test_style.py b/podpac/core/test/test_style.py index 1b057e6cf..b5357d241 100644 --- a/podpac/core/test/test_style.py +++ b/podpac/core/test/test_style.py @@ -24,12 +24,38 @@ def test_cmap(self): style = Style(colormap="cividis") assert style.cmap.name == "cividis" - style = Style(enumeration_colors=("c", "k")) + style = Style(enumeration_colors=({0: "c", 1: "k"})) assert style.cmap.name == "from_list" assert style.cmap.colors == ("c", "k") with pytest.raises(TypeError, match="Style can have a colormap or enumeration_colors"): - style = Style(colormap="cividis", enumeration_colors=("c", "k")) + style = Style(colormap="cividis", enumeration_colors=({0: "c", 1: "k"})) + + def test_enumeration(self): + # matplotlib enumeration tuples + style = Style( + enumeration_colors={1: "r", 3: "o"}, + enumeration_legend={1: "apples", 3: "oranges"}, + default_enumeration_color="k", + ) + assert style.full_enumeration_colors == ("r", "k", "o") + assert style.full_enumeration_legend == ("apples", "unknown", "oranges") + + # negative key + style = Style( + enumeration_colors={-1: "r", 1: "o"}, + enumeration_legend={-1: "apples", 1: "oranges"}, + default_enumeration_color="k", + ) + assert style.full_enumeration_colors == ("r", "k", "o") + assert style.full_enumeration_legend == ("apples", "unknown", "oranges") + + # invalid + with pytest.raises(ValueError, match="Style enumeration_legend keys must match enumeration_colors keys"): + style = Style(enumeration_colors={1: "r", 3: "o"}, enumeration_legend={1: "apples"}) + + with pytest.raises(TypeError, match="Style enumeration_legend requires enumeration_colors"): + style = Style(enumeration_legend={-1: "apples", 3: "oranges"}) def test_serialization(self): # default @@ -58,12 +84,12 @@ def test_serialization(self): assert s.clim == style.clim # enumeration traits - style = Style(enumeration_legend=("apples", "oranges"), enumeration_colors=["r", "o"]) + style = Style(enumeration_legend=({0: "apples", 1: "oranges"}), enumeration_colors=({0: "r", 1: "o"})) d = style.definition assert isinstance(d, OrderedDict) assert set(d.keys()) == {"enumeration_legend", "enumeration_colors"} - assert d["enumeration_legend"] == ("apples", "oranges") - assert d["enumeration_colors"] == ("r", "o") + assert d["enumeration_legend"] == {0: "apples", 1: "oranges"} + assert d["enumeration_colors"] == {0: "r", 1: "o"} s = Style.from_json(style.json) assert s.enumeration_legend == style.enumeration_legend diff --git a/podpac/core/test/test_units.py b/podpac/core/test/test_units.py index 494438ca7..54414d13c 100644 --- a/podpac/core/test/test_units.py +++ b/podpac/core/test/test_units.py @@ -320,7 +320,6 @@ def test_to_image_vmin_vmax(self): assert isinstance(uda.to_image(vmin=0, vmax=2, return_base64=True), bytes) assert isinstance(uda.to_image(vmin=0, vmax=2), io.BytesIO) - @pytest.mark.skip(reason="Error in xarray layer") def test_ufuncs(self): a1 = UnitsDataArray(np.ones((4, 3)), dims=["lat", "lon"], attrs={"units": ureg.meter}) a2 = UnitsDataArray(np.ones((4, 3)), dims=["lat", "lon"], attrs={"units": ureg.kelvin}) @@ -336,6 +335,30 @@ def test_ufuncs(self): np.std(a1) np.var(a1) + def test_keep_attrs(self): + # This tests #265 + # Create Nodes to use the convience methods for making units data arrays + a1 = UnitsDataArray(np.ones((4, 3)), dims=["lat", "lon"], attrs={"units": ureg.meter, "test": "test"}) + a2 = UnitsDataArray(np.ones((4, 3)), dims=["lat", "lon"], attrs={"units": ureg.yard}) + + assert "test" in (a1 + a2).attrs + assert "test" in (a1 * a2).attrs + + # No units + a1 = UnitsDataArray(np.ones((4, 3)), dims=["lat", "lon"], attrs={"test": "test"}) + a2 = UnitsDataArray(np.ones((4, 3)), dims=["lat", "lon"]) + + assert "test" in (a1 + 1).attrs + assert "test" in (a1 + a2).attrs + assert "test" in (a1 * 1).attrs + assert "test" in (a1 * a2).attrs + + # Order is important + assert "test" not in (1 + a1).attrs + assert "test" not in (a2 + a1).attrs + assert "test" not in (1 * a1).attrs + assert "test" not in (a2 * a1).attrs + class TestCreateDataArray(object): @classmethod diff --git a/podpac/core/test/test_utils.py b/podpac/core/test/test_utils.py index a34a7a144..481d31511 100644 --- a/podpac/core/test/test_utils.py +++ b/podpac/core/test/test_utils.py @@ -1,4 +1,5 @@ from __future__ import division, unicode_literals, print_function, absolute_import +from io import StringIO import os import sys @@ -21,6 +22,7 @@ from podpac.core.utils import JSONEncoder, is_json_serializable from podpac.core.utils import cached_property from podpac.core.utils import ind2slice +from podpac.core.utils import probe_node class TestCommonDocs(object): @@ -467,3 +469,241 @@ def test_nontuple(self): assert ind2slice([1, 2, 4]) == slice(1, 5) assert ind2slice([False, True, True, False, True, False]) == slice(1, 5) assert ind2slice([1, 3, 5]) == slice(1, 7, 2) + + +class AnotherOne(podpac.algorithm.Algorithm): + def algorithm(self, inputs, coordinates): + return self.create_output_array(coordinates, data=1) + + +class TestNodeProber(object): + coords = podpac.Coordinates([podpac.clinspace(0, 2, 3, "lat"), podpac.clinspace(0, 2, 3, "lon")]) + one = podpac.data.Array( + source=np.ones((3, 3)), coordinates=coords, style=podpac.style.Style(name="one_style", units="o") + ) + two = podpac.data.Array( + source=np.ones((3, 3)) * 2, coordinates=coords, style=podpac.style.Style(name="two_style", units="t") + ) + arange = podpac.algorithm.Arange() + nan = podpac.data.Array(source=np.ones((3, 3)) * np.nan, coordinates=coords) + another_one = AnotherOne() + + def test_single_prober(self): + expected = { + "Array": { + "active": True, + "value": 1, + "units": "o", + "inputs": [], + "name": "one_style", + "node_hash": self.one.hash, + } + } + out = probe_node(self.one, lat=1, lon=1) + assert out == expected + + def test_serial_prober(self): + with podpac.settings: + podpac.settings.set_unsafe_eval(True) + a = podpac.algorithm.Arithmetic(one=self.one, eqn="one * 2") + b = podpac.algorithm.Arithmetic(a=a, eqn="a*3", style=podpac.style.Style(name="six_style", units="m")) + expected = { + "Array": { + "active": True, + "value": 1.0, + "units": "o", + "inputs": [], + "name": "one_style", + "node_hash": self.one.hash, + }, + "Arithmetic": { + "active": True, + "value": 2.0, + "units": "", + "inputs": ["Array"], + "name": "Arithmetic", + "node_hash": a.hash, + }, + "Arithmetic_1": { + "active": True, + "value": 6.0, + "units": "m", + "inputs": ["Arithmetic"], + "name": "six_style", + "node_hash": b.hash, + }, + } + out = probe_node(b, lat=1, lon=1) + assert out == expected + + def test_parallel_prober(self): + with podpac.settings: + podpac.settings.set_unsafe_eval(True) + a = podpac.algorithm.Arithmetic(one=self.one, two=self.two, eqn="one * two") + expected = { + "Array": { + "active": True, + "value": 1.0, + "units": "o", + "inputs": [], + "name": "one_style", + "node_hash": self.one.hash, + }, + "Array_1": { + "active": True, + "value": 2.0, + "units": "t", + "inputs": [], + "name": "two_style", + "node_hash": self.two.hash, + }, + "Arithmetic": { + "active": True, + "value": 2.0, + "units": "", + "inputs": ["Array", "Array_1"], + "name": "Arithmetic", + "node_hash": a.hash, + }, + } + out = probe_node(a, lat=1, lon=1) + assert out == expected + + def test_composited_prober(self): + a = podpac.compositor.OrderedCompositor(sources=[self.one, self.arange]) + expected = { + "Array": { + "active": True, + "value": 1.0, + "units": "o", + "inputs": [], + "name": "one_style", + "node_hash": self.one.hash, + }, + "Arange": { + "active": False, + "value": 0.0, + "units": "", + "inputs": [], + "name": "Arange", + "node_hash": self.arange.hash, + }, + "OrderedCompositor": { + "active": True, + "value": 1.0, + "units": "", + "inputs": ["Array", "Arange"], + "name": "OrderedCompositor", + "node_hash": a.hash, + }, + } + out = probe_node(a, lat=1, lon=1) + assert out == expected + + a = podpac.compositor.OrderedCompositor(sources=[self.nan, self.two]) + expected = { + "Array": { + "active": False, + "value": "nan", + "units": "", + "inputs": [], + "name": "Array", + "node_hash": self.nan.hash, + }, + "Array_1": { + "active": True, + "value": 2.0, + "units": "t", + "inputs": [], + "name": "two_style", + "node_hash": self.two.hash, + }, + "OrderedCompositor": { + "active": True, + "value": 2.0, + "units": "", + "inputs": ["Array", "Array_1"], + "name": "OrderedCompositor", + "node_hash": a.hash, + }, + } + out = probe_node(a, lat=1, lon=1) + for k in out: + if np.isnan(out[k]["value"]): + out[k]["value"] = "nan" + assert out == expected + + a = podpac.compositor.OrderedCompositor(sources=[self.nan, self.one, self.another_one]) + expected = { + "Array": { + "active": False, + "value": "nan", + "units": "", + "inputs": [], + "name": "Array", + "node_hash": self.nan.hash, + }, + "Array_1": { + "active": True, + "value": 1.0, + "units": "o", + "inputs": [], + "name": "one_style", + "node_hash": self.one.hash, + }, + "AnotherOne": { + "active": False, + "value": 1.0, + "units": "", + "inputs": [], + "name": "AnotherOne", + "node_hash": self.another_one.hash, + }, + "OrderedCompositor": { + "active": True, + "value": 1.0, + "units": "", + "inputs": ["Array", "Array_1", "AnotherOne"], + "name": "OrderedCompositor", + "node_hash": a.hash, + }, + } + out = probe_node(a, lat=1, lon=1) + for k in out: + if np.isnan(out[k]["value"]): + out[k]["value"] = "nan" + assert out == expected + + def test_composited_prober_nested(self): + a = podpac.compositor.OrderedCompositor( + sources=[self.one, self.arange], style=podpac.style.Style(name="composited", units="c") + ) + expected = { + "name": "composited", + "value": "1.0 c", + "active": True, + "node_id": a.hash, + "params": {}, + "inputs": { + "inputs": [ + { + "name": "one_style", + "value": "1.0 o", + "active": True, + "node_id": self.one.hash, + "params": {}, + "inputs": {}, + }, + { + "name": "Arange", + "value": "0.0", + "active": False, + "node_id": self.arange.hash, + "params": {}, + "inputs": {}, + }, + ] + }, + } + out = probe_node(a, lat=1, lon=1, nested=True) + assert out == expected diff --git a/podpac/core/units.py b/podpac/core/units.py index 2f1fc44ac..3573ed8e8 100644 --- a/podpac/core/units.py +++ b/podpac/core/units.py @@ -181,6 +181,7 @@ def to_format(self, format, *args, **kwargs): r = self.to_image(format, *args, **kwargs) elif format.upper() in ["TIFF", "TIF", "GEOTIFF"]: r = self.to_geotiff(*args, **kwargs) + elif format in ["pickle", "pkl"]: r = cPickle.dumps(self) elif format == "zarr_part": @@ -235,11 +236,11 @@ def to_image(self, format="png", vmin=None, vmax=None, return_base64=False): """ return to_image(self, format, vmin, vmax, return_base64) - def to_geotiff(self, fp, geotransform=None, crs=None, **kwargs): + def to_geotiff(self, fp=None, geotransform=None, crs=None, **kwargs): """ For documentation, see `core.units.to_geotiff` """ - to_geotiff(fp, self, geotransform=geotransform, crs=crs, **kwargs) + return to_geotiff(fp, self, geotransform=geotransform, crs=crs, **kwargs) def _pp_serialize(self): if self.attrs.get("units"): @@ -474,7 +475,12 @@ def create(cls, c, data=np.nan, outputs=None, dtype=float, **kwargs): def make_func(meth, tp): def func(self, other): x = getattr(super(UnitsDataArray, self), meth)(other) - return self._apply_binary_op_to_units(getattr(operator, tp), other, x) + x2 = self._apply_binary_op_to_units(getattr(operator, tp), other, x) + units = x2.attrs.get("units") + x2.attrs = self.attrs + if units is not None: + x2.attrs["units"] = units + return x2 return func @@ -490,7 +496,9 @@ def make_func(meth, tp): def func(self, other): multiplier = self._get_unit_multiplier(other) x = getattr(super(UnitsDataArray, self), meth)(other * multiplier) - return self._apply_binary_op_to_units(getattr(operator, tp), other, x) + x2 = self._apply_binary_op_to_units(getattr(operator, tp), other, x) + x2.attrs = self.attrs + return x2 return func @@ -621,7 +629,8 @@ def to_geotiff(fp, data, geotransform=None, crs=None, **kwargs): Params ------- fp: str, file object or pathlib.Path object - A filename or URL, a file object opened in binary ('rb') mode, or a Path object. + A filename or URL, a file object opened in binary ('rb') mode, or a Path object. If not supplied, the results will + be written to a memfile object data: UnitsDataArray, xr.DataArray, np.ndarray The data to be saved. If there is more than 1 band, this should be the last dimension of the array. If given a np.ndarray, ensure that the 'lat' dimension is aligned with the rows of the data, with an appropriate @@ -640,15 +649,20 @@ def to_geotiff(fp, data, geotransform=None, crs=None, **kwargs): dtype=data.dtype mode="w" + Returns + -------- + MemoryFile, list + If fp is given, results a list of the results for writing to each band r.append(dst.write(data[..., i], i + 1)) + If fp is None, returns the MemoryFile object """ # This only works for data that essentially has lat/lon only dims = data.coords.dims if "lat" not in dims or "lon" not in dims: raise NotImplementedError("Cannot export GeoTIFF for dataset with lat/lon coordinates.") - if "time" in dims and len(data.coords["time"] > 1): + if "time" in dims and len(data.coords["time"]) > 1: raise NotImplemented("Cannot export GeoTIFF for dataset with multiple times,") - if "alt" in dims and len(data.coords["alt"] > 1): + if "alt" in dims and len(data.coords["alt"]) > 1: raise NotImplemented("Cannot export GeoTIFF for dataset with multiple altitudes.") # TODO: add proper checks, etc. to make sure we handle edge cases and throw errors when we cannot support @@ -669,12 +683,15 @@ def to_geotiff(fp, data, geotransform=None, crs=None, **kwargs): # if isinstance(data, xr.DataArray) and data.dims.index('lat') > data.dims.index('lon'): # geotransform = geotransform[3:] + geotransform[:3] if geotransform is None: - raise ValueError( - "The `geotransform` of the data needs to be provided to save as GeoTIFF. If the geotransform attribute " - "wasn't automatically populated as part of the dataset, it means that the data is in a non-uniform " - "coordinate system. This can sometimes happen when the data is transformed to a different CRS than the " - "native CRS, which can cause the coordinates to seems non-uniform due to floating point precision. " - ) + try: + geotransform = Coordinates.from_xarray(data).geotransform + except (TypeError, AttributeError): + raise ValueError( + "The `geotransform` of the data needs to be provided to save as GeoTIFF. If the geotransform attribute " + "wasn't automatically populated as part of the dataset, it means that the data is in a non-uniform " + "coordinate system. This can sometimes happen when the data is transformed to a different CRS than the " + "native CRS, which can cause the coordinates to seems non-uniform due to floating point precision. " + ) # Make all types into a numpy array if isinstance(data, xr.DataArray): @@ -698,14 +715,21 @@ def to_geotiff(fp, data, geotransform=None, crs=None, **kwargs): dtype=data.dtype, crs=crs, transform=geotransform, - mode="w", ) kwargs2.update(kwargs) # Write the file - r = [] - with rasterio.open(fp, **kwargs2) as dst: - for i in range(data.shape[2]): - r.append(dst.write(data[..., i], i + 1)) + if fp is None: + # Write to memory file + r = rasterio.io.MemoryFile() + with r.open(**kwargs2) as dst: + for i in range(data.shape[2]): + dst.write(data[..., i], i + 1) + else: + r = [] + kwargs2["mode"] = "w" + with rasterio.open(fp, **kwargs2) as dst: + for i in range(data.shape[2]): + r.append(dst.write(data[..., i], i + 1)) return r diff --git a/podpac/core/utils.py b/podpac/core/utils.py index a2a4ee7e0..c87ea13f0 100644 --- a/podpac/core/utils.py +++ b/podpac/core/utils.py @@ -26,6 +26,7 @@ import numpy as np import xarray as xr import pandas as pd # Core dependency of xarray +import pyproj # Optional Imports requests = lazy_import.lazy_module("requests") @@ -459,3 +460,124 @@ def _ind2slice(I): # non-stepped slice return slice(I.min(), I.max() + 1) + + +def resolve_bbox_order(bbox, crs, size): + """ + Utility that puts the OGC WMS/WCS BBox in the order specified by the CRS. + """ + crs = pyproj.CRS(crs) + r = 1 + if crs.axis_info[0].direction != "north": + r = -1 + lat_start, lon_start = bbox[:2][::r] + lat_stop, lon_stop = bbox[2::][::r] + size = size[::r] + + return {"lat": [lat_start, lat_stop, size[0]], "lon": [lon_start, lon_stop, size[1]]} + + +def probe_node(node, lat=None, lon=None, time=None, alt=None, crs=None, nested=False): + """Evaluates every part of a node / pipeline at a point and records + which nodes are actively being used. + + Parameters + ------------ + node : podpac.Node + A PODPAC Node instance + lat : float, optional + Default is None. The latitude location + lon : float, optional + Default is None. The longitude location + time : float, np.datetime64, optional + Default is None. The time + alt : float, optional + Default is None. The altitude location + crs : str, optional + Default is None. The CRS of the request. + nested : bool, optional + Default is False. If True, will return a nested version of the + output dictionary isntead + + Returns + dict + A dictionary that contains the following for each node: + ``` + { + "active": bool, # If the node is being used or not + "value": float, # The value of the node evaluated at that point + "inputs": list, # List of names of input nodes (based on definition) + "name": str, # node.style.name + "node_hash": str, # The node's hash or node.base_ref + } + ``` + """ + + def partial_definition(key, definition): + """ Needed to build partial node definitions """ + new_def = OrderedDict() + for k in definition: + new_def[k] = definition[k] + if k == key: + return new_def + + def flatten_list(l): + """ Needed to flatten the inputs list for all the dependencies """ + nl = [] + for ll in l: + if isinstance(ll, list): + lll = flatten_list(ll) + nl.extend(lll) + else: + nl.append(ll) + return nl + + def get_entry(key, out, definition): + """ Needed for the nested version of the pipeline """ + # We have to rearrange the outputs + entry = OrderedDict() + entry["name"] = out[key]["name"] + entry["value"] = str(out[key]["value"]) + if out[key]["units"] not in [None, ""]: + entry["value"] = entry["value"] + " " + str(out[key]["units"]) + entry["active"] = out[key]["active"] + entry["node_id"] = out[key]["node_hash"] + entry["params"] = {} + entry["inputs"] = {"inputs": [get_entry(inp, out, definition) for inp in out[key]["inputs"]]} + if len(entry["inputs"]["inputs"]) == 0: + entry["inputs"] = {} + return entry + + c = [(v, d) for v, d in zip([lat, lon, time, alt], ["lat", "lon", "time", "alt"]) if v is not None] + coords = podpac.Coordinates([[v[0]] for v in c], [[d[1]] for d in c], crs=crs) + v = float(node.eval(coords)) + definition = node.definition + out = OrderedDict() + for key in definition: + if key == "podpac_version": + continue + d = partial_definition(key, definition) + n = podpac.Node.from_definition(d) + value = float(n.eval(coords)) + inputs = flatten_list(list(d[key].get("inputs", {}).values())) + active = True + out[key] = { + "active": active, + "value": value, + "units": n.style.units, + "inputs": inputs, + "name": n.style.name if n.style.name else key, + "node_hash": n.hash, + } + # Fix sources for Compositors + if isinstance(n, podpac.compositor.OrderedCompositor): + searching_for_active = True + for inp in inputs: + out[inp]["active"] = False + if out[inp]["value"] == out[key]["value"] and np.isfinite(out[inp]["value"]) and searching_for_active: + out[inp]["active"] = True + searching_for_active = False + + if nested: + out = get_entry(list(out.keys())[-1], out, definition) + return out diff --git a/podpac/data.py b/podpac/data.py index 87ac9b4d8..ab4127a7e 100644 --- a/podpac/data.py +++ b/podpac/data.py @@ -13,5 +13,6 @@ from podpac.core.data.dataset_source import Dataset from podpac.core.data.zarr_source import Zarr from podpac.core.data.ogc import WCS +from podpac.core.data.ogr import OGR from podpac.core.data.reprojection import ReprojectedSource from podpac.core.interpolation.interpolation_manager import INTERPOLATION_METHODS diff --git a/podpac/datalib/drought_monitor.py b/podpac/datalib/drought_monitor.py index c17b4bbc0..3c7011efa 100644 --- a/podpac/datalib/drought_monitor.py +++ b/podpac/datalib/drought_monitor.py @@ -17,14 +17,14 @@ class DroughtCategory(Algorithm): d4 = NodeTrait().tag(attr=True) style = Style( clim=[0, 6], - enumeration_colors=[ - [0.45098039, 0.0, 0.0, 1.0], - [0.90196078, 0.0, 0.0, 1.0], - [1.0, 0.66666667, 0.0, 1.0], - [0.98823529, 0.82745098, 0.49803922, 1.0], - [1.0, 1.0, 0.0, 1.0], - [1.0, 1.0, 1.0, 0.0], - ], + enumeration_colors={ + 0: (0.45098039, 0.0, 0.0, 1.0), + 1: (0.90196078, 0.0, 0.0, 1.0), + 2: (1.0, 0.66666667, 0.0, 1.0), + 3: (0.98823529, 0.82745098, 0.49803922, 1.0), + 4: (1.0, 1.0, 0.0, 1.0), + 5: (1.0, 1.0, 1.0, 0.0), + }, ) def algorithm(self, inputs, coordinates): diff --git a/podpac/datalib/test/test_gfs.py b/podpac/datalib/test/test_gfs.py index 4bd3cebf1..9d098195f 100644 --- a/podpac/datalib/test/test_gfs.py +++ b/podpac/datalib/test/test_gfs.py @@ -7,6 +7,7 @@ from podpac.datalib import gfs +@pytest.mark.skip("Broken, GFS data source structure changed. ") @pytest.mark.integration class TestGFS(object): parameter = "SOIM" diff --git a/podpac/version.py b/podpac/version.py index a8a5e431b..7c3815ae2 100644 --- a/podpac/version.py +++ b/podpac/version.py @@ -16,7 +16,7 @@ ## UPDATE VERSION HERE ############## MAJOR = 3 -MINOR = 0 +MINOR = 1 HOTFIX = 0 ############## @@ -64,7 +64,7 @@ def version(): except Exception as e: return version_full - version_full = subprocess.check_output([git, "describe", "--always"], cwd=CWD).strip().decode("ascii") + version_full = subprocess.check_output([git, "describe", "--always", "--tags"], cwd=CWD).strip().decode("ascii") version_full = version_full.replace("-", "+", 1).replace("-", ".") # Make this consistent with PEP440 except Exception as e: