From 524218921a4fd7d999699eefcd082cdd0fa73e08 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 3 Jun 2021 10:58:46 -0500 Subject: [PATCH 01/15] WIP: Added to_xarray for collection-level assets --- intake_stac/catalog.py | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/intake_stac/catalog.py b/intake_stac/catalog.py index 8ed746f..eda459d 100644 --- a/intake_stac/catalog.py +++ b/intake_stac/catalog.py @@ -2,6 +2,7 @@ import warnings import pystac +import intake_xarray from intake.catalog import Catalog from intake.catalog.local import LocalCatalogEntry from pkg_resources import get_distribution @@ -36,6 +37,7 @@ 'application/json': 'textfiles', 'application/geo+json': 'geopandas', 'application/geopackage+sqlite3': 'geopandas', + 'application/vnd+zarr': "zarr", } @@ -148,6 +150,45 @@ def _get_metadata(self, **kwargs): del metadata['links'] return metadata + def to_xarray(self, asset, media_type="application/vnd+zarr", storage_options=None, **kwargs): + r""" + Load a collection with Zarr assets as an Xarray Dataset. + + Parameters + ---------- + asset : str, optional + The asset key to use if multiple Zarr assets are provided. + media_type : str, default "application/vnd+zarr" + The Asset media type to look for. + storage_options : dict, optional + Additional storage opens to use in :meth:`xarray.open_zarr`. Merged with + ``self.storage_options`` + **kwargs + Additional keyword options are provided to :class:`intake_xarray.ZarrSource`. + + Returns + ------- + xarray.Dataset + The Zarr dataset located at `asset` loaded into an xarray Dataset. + """ + if media_type: + assets = { + k: v for k, v in self._stac_obj.assets.items() + if v.media_type == media_type + } + else: + assets = self._stac_obj.assets + + if len(assets) == 0: + raise ValueError(f"Catalog {self.id} does not have any assets with media type 'application/vnd+zarr'.") + else: + try: + asset = assets[asset] + except KeyError: + raise KeyError(f'No asset named {asset}. Should be one of {list(assets)}') from None + storage_options = {**(self.storage_options or {}), **(storage_options or {})} + return intake_xarray.ZarrSource(asset.href, storage_options=storage_options, **kwargs).to_dask() + class StacCollection(StacCatalog): """ From e72758177c920c998c9811bd80459aea55ea52d1 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 3 Jun 2021 12:48:35 -0500 Subject: [PATCH 02/15] tests --- intake_stac/catalog.py | 2 +- intake_stac/tests/test_catalog.py | 22 ++++++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/intake_stac/catalog.py b/intake_stac/catalog.py index eda459d..5878e2e 100644 --- a/intake_stac/catalog.py +++ b/intake_stac/catalog.py @@ -180,7 +180,7 @@ def to_xarray(self, asset, media_type="application/vnd+zarr", storage_options=No assets = self._stac_obj.assets if len(assets) == 0: - raise ValueError(f"Catalog {self.id} does not have any assets with media type 'application/vnd+zarr'.") + raise ValueError(f"Catalog {self_stac_obj.id} does not have any assets with media type 'application/vnd+zarr'.") else: try: asset = assets[asset] diff --git a/intake_stac/tests/test_catalog.py b/intake_stac/tests/test_catalog.py index e601612..1a2b12d 100644 --- a/intake_stac/tests/test_catalog.py +++ b/intake_stac/tests/test_catalog.py @@ -281,3 +281,25 @@ def test_collection_of_collection(): result = StacCollection(parent) result._load() + + +def test_collection_level_assets(): + import fsspec + import xarray as xr + import numpy as np + + data = xr.DataArray(np.ones((5, 5, 5)), dims=("time", "y", "x")) + ds = xr.Dataset({"data": data}) + store = fsspec.filesystem("memory").get_mapper("data.zarr") + ds.to_zarr(store, mode="w") + + extent = pystac.Extent(spatial=pystac.SpatialExtent([[]]), temporal=pystac.TemporalExtent([[None, None]])) + collection = pystac.Collection( + id="id", description="description", license="license", extent=extent + ) + collection.add_asset("data", pystac.Asset(href="memory://data.zarr", media_type="application/vnd+zarr")) + + # test + intake_collection = StacCollection(collection) + result = intake_collection.to_xarray("data") + xr.testing.assert_equal(result, ds) \ No newline at end of file From 1ba67e4d2dfe04b8e78985450b57425e20d390d2 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 14 Jun 2021 09:43:08 -0500 Subject: [PATCH 03/15] fixups --- intake_stac/catalog.py | 55 +++++++++++++------------------ intake_stac/tests/test_catalog.py | 11 ++++--- 2 files changed, 28 insertions(+), 38 deletions(-) diff --git a/intake_stac/catalog.py b/intake_stac/catalog.py index 5878e2e..bc38eff 100644 --- a/intake_stac/catalog.py +++ b/intake_stac/catalog.py @@ -3,6 +3,7 @@ import pystac import intake_xarray +import intake from intake.catalog import Catalog from intake.catalog.local import LocalCatalogEntry from pkg_resources import get_distribution @@ -150,16 +151,28 @@ def _get_metadata(self, **kwargs): del metadata['links'] return metadata - def to_xarray(self, asset, media_type="application/vnd+zarr", storage_options=None, **kwargs): + +class StacCollection(StacCatalog): + """ + Maps Intake Catalog to a STAC Collection + https://pystac.readthedocs.io/en/latest/api.html#collection-spec + + Collections have a number of properties that Catalogs do not, most notably + the spatial and temporal extents. This is currently a placeholder for + future Collection-specific attributes and methods. + """ + + name = 'stac_catalog' + _stac_cls = pystac.Collection + + def to_dask(self, asset, storage_options=None, **kwargs): r""" - Load a collection with Zarr assets as an Xarray Dataset. + Load a collection-level asset to a Dask-backed object. Parameters ---------- asset : str, optional The asset key to use if multiple Zarr assets are provided. - media_type : str, default "application/vnd+zarr" - The Asset media type to look for. storage_options : dict, optional Additional storage opens to use in :meth:`xarray.open_zarr`. Merged with ``self.storage_options`` @@ -171,37 +184,13 @@ def to_xarray(self, asset, media_type="application/vnd+zarr", storage_options=No xarray.Dataset The Zarr dataset located at `asset` loaded into an xarray Dataset. """ - if media_type: - assets = { - k: v for k, v in self._stac_obj.assets.items() - if v.media_type == media_type - } - else: - assets = self._stac_obj.assets + try: + asset_ = self._stac_obj.assets[asset] + except KeyError: + raise KeyError(f'No asset named {asset}. Should be one of {list(self._stac_obj.assets)}') from None - if len(assets) == 0: - raise ValueError(f"Catalog {self_stac_obj.id} does not have any assets with media type 'application/vnd+zarr'.") - else: - try: - asset = assets[asset] - except KeyError: - raise KeyError(f'No asset named {asset}. Should be one of {list(assets)}') from None storage_options = {**(self.storage_options or {}), **(storage_options or {})} - return intake_xarray.ZarrSource(asset.href, storage_options=storage_options, **kwargs).to_dask() - - -class StacCollection(StacCatalog): - """ - Maps Intake Catalog to a STAC Collection - https://pystac.readthedocs.io/en/latest/api.html#collection-spec - - Collections have a number of properties that Catalogs do not, most notably - the spatial and temporal extents. This is currently a placeholder for - future Collection-specific attributes and methods. - """ - - name = 'stac_catalog' - _stac_cls = pystac.Collection + return StacAsset(asset, asset_)(storage_options=storage_options, **kwargs).to_dask() class StacItemCollection(AbstractStacCatalog): diff --git a/intake_stac/tests/test_catalog.py b/intake_stac/tests/test_catalog.py index 1a2b12d..a2ac162 100644 --- a/intake_stac/tests/test_catalog.py +++ b/intake_stac/tests/test_catalog.py @@ -2,15 +2,20 @@ import os.path import sys +import fsspec import intake +import numpy as np import pystac import pytest +import xarray as xr + from intake.catalog import Catalog from intake.catalog.local import LocalCatalogEntry from intake_stac import StacCatalog, StacCollection, StacItem, StacItemCollection from intake_stac.catalog import CombinedAssets, StacAsset + here = os.path.dirname(__file__) # sat-stac examples @@ -284,10 +289,6 @@ def test_collection_of_collection(): def test_collection_level_assets(): - import fsspec - import xarray as xr - import numpy as np - data = xr.DataArray(np.ones((5, 5, 5)), dims=("time", "y", "x")) ds = xr.Dataset({"data": data}) store = fsspec.filesystem("memory").get_mapper("data.zarr") @@ -301,5 +302,5 @@ def test_collection_level_assets(): # test intake_collection = StacCollection(collection) - result = intake_collection.to_xarray("data") + result = intake_collection.to_dask("data") xr.testing.assert_equal(result, ds) \ No newline at end of file From 43ab47fbf00387c7d93096fcf547f2bc6a5883c5 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 14 Jun 2021 09:47:30 -0500 Subject: [PATCH 04/15] fixups --- intake_stac/catalog.py | 8 ++++---- intake_stac/tests/test_catalog.py | 24 +++++++++++++----------- 2 files changed, 17 insertions(+), 15 deletions(-) diff --git a/intake_stac/catalog.py b/intake_stac/catalog.py index bc38eff..9c919c9 100644 --- a/intake_stac/catalog.py +++ b/intake_stac/catalog.py @@ -2,8 +2,6 @@ import warnings import pystac -import intake_xarray -import intake from intake.catalog import Catalog from intake.catalog.local import LocalCatalogEntry from pkg_resources import get_distribution @@ -38,7 +36,7 @@ 'application/json': 'textfiles', 'application/geo+json': 'geopandas', 'application/geopackage+sqlite3': 'geopandas', - 'application/vnd+zarr': "zarr", + 'application/vnd+zarr': 'zarr', } @@ -187,7 +185,9 @@ def to_dask(self, asset, storage_options=None, **kwargs): try: asset_ = self._stac_obj.assets[asset] except KeyError: - raise KeyError(f'No asset named {asset}. Should be one of {list(self._stac_obj.assets)}') from None + raise KeyError( + f'No asset named {asset}. Should be one of {list(self._stac_obj.assets)}' + ) from None storage_options = {**(self.storage_options or {}), **(storage_options or {})} return StacAsset(asset, asset_)(storage_options=storage_options, **kwargs).to_dask() diff --git a/intake_stac/tests/test_catalog.py b/intake_stac/tests/test_catalog.py index a2ac162..cca71b0 100644 --- a/intake_stac/tests/test_catalog.py +++ b/intake_stac/tests/test_catalog.py @@ -8,14 +8,12 @@ import pystac import pytest import xarray as xr - from intake.catalog import Catalog from intake.catalog.local import LocalCatalogEntry from intake_stac import StacCatalog, StacCollection, StacItem, StacItemCollection from intake_stac.catalog import CombinedAssets, StacAsset - here = os.path.dirname(__file__) # sat-stac examples @@ -289,18 +287,22 @@ def test_collection_of_collection(): def test_collection_level_assets(): - data = xr.DataArray(np.ones((5, 5, 5)), dims=("time", "y", "x")) - ds = xr.Dataset({"data": data}) - store = fsspec.filesystem("memory").get_mapper("data.zarr") - ds.to_zarr(store, mode="w") + data = xr.DataArray(np.ones((5, 5, 5)), dims=('time', 'y', 'x')) + ds = xr.Dataset({'data': data}) + store = fsspec.filesystem('memory').get_mapper('data.zarr') + ds.to_zarr(store, mode='w') - extent = pystac.Extent(spatial=pystac.SpatialExtent([[]]), temporal=pystac.TemporalExtent([[None, None]])) + extent = pystac.Extent( + spatial=pystac.SpatialExtent([[]]), temporal=pystac.TemporalExtent([[None, None]]) + ) collection = pystac.Collection( - id="id", description="description", license="license", extent=extent + id='id', description='description', license='license', extent=extent + ) + collection.add_asset( + 'data', pystac.Asset(href='memory://data.zarr', media_type='application/vnd+zarr') ) - collection.add_asset("data", pystac.Asset(href="memory://data.zarr", media_type="application/vnd+zarr")) # test intake_collection = StacCollection(collection) - result = intake_collection.to_dask("data") - xr.testing.assert_equal(result, ds) \ No newline at end of file + result = intake_collection.to_dask('data') + xr.testing.assert_equal(result, ds) From dc5e89bad6fa90170ad209195dc6fa4e9d9de500 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 14 Jun 2021 09:51:37 -0500 Subject: [PATCH 05/15] fixup --- intake_stac/catalog.py | 218 +++++++++++++++++++++++------------------ 1 file changed, 121 insertions(+), 97 deletions(-) diff --git a/intake_stac/catalog.py b/intake_stac/catalog.py index 9c919c9..4bd56dd 100644 --- a/intake_stac/catalog.py +++ b/intake_stac/catalog.py @@ -6,37 +6,37 @@ from intake.catalog.local import LocalCatalogEntry from pkg_resources import get_distribution -__version__ = get_distribution('intake_stac').version +__version__ = get_distribution("intake_stac").version # STAC catalog asset 'type' determines intake driver: # https://github.com/radiantearth/stac-spec/blob/master/item-spec/item-spec.md#media-types -default_type = 'application/rasterio' -default_driver = 'rasterio' +default_type = "application/rasterio" +default_driver = "rasterio" drivers = { - 'application/netcdf': 'netcdf', - 'application/x-netcdf': 'netcdf', - 'application/parquet': 'parquet', - 'application/x-parquet': 'parquet', - 'application/x-hdf': 'netcdf', - 'application/x-hdf5': 'netcdf', - 'application/rasterio': 'rasterio', - 'image/vnd.stac.geotiff': 'rasterio', - 'image/vnd.stac.geotiff; cloud-optimized=true': 'rasterio', - 'image/x.geotiff': 'rasterio', - 'image/tiff; application=geotiff': 'rasterio', - 'image/tiff; application=geotiff; profile=cloud-optimized': 'rasterio', # noqa: E501 - 'image/jp2': 'rasterio', - 'image/png': 'xarray_image', - 'image/jpg': 'xarray_image', - 'image/jpeg': 'xarray_image', - 'text/xml': 'textfiles', - 'text/plain': 'textfiles', - 'text/html': 'textfiles', - 'application/json': 'textfiles', - 'application/geo+json': 'geopandas', - 'application/geopackage+sqlite3': 'geopandas', - 'application/vnd+zarr': 'zarr', + "application/netcdf": "netcdf", + "application/x-netcdf": "netcdf", + "application/parquet": "parquet", + "application/x-parquet": "parquet", + "application/x-hdf": "netcdf", + "application/x-hdf5": "netcdf", + "application/rasterio": "rasterio", + "image/vnd.stac.geotiff": "rasterio", + "image/vnd.stac.geotiff; cloud-optimized=true": "rasterio", + "image/x.geotiff": "rasterio", + "image/tiff; application=geotiff": "rasterio", + "image/tiff; application=geotiff; profile=cloud-optimized": "rasterio", # noqa: E501 + "image/jp2": "rasterio", + "image/png": "xarray_image", + "image/jpg": "xarray_image", + "image/jpeg": "xarray_image", + "text/xml": "textfiles", + "text/plain": "textfiles", + "text/html": "textfiles", + "application/json": "textfiles", + "application/geo+json": "geopandas", + "application/geopackage+sqlite3": "geopandas", + "application/vnd+zarr": "zarr", } @@ -61,11 +61,14 @@ def __init__(self, stac_obj, **kwargs): elif isinstance(stac_obj, str): self._stac_obj = self._stac_cls.from_file(stac_obj) else: - raise ValueError('Expected %s instance, got: %s' % (self._stac_cls, type(stac_obj))) + raise ValueError( + "Expected %s instance, got: %s" + % (self._stac_cls, type(stac_obj)) + ) - metadata = self._get_metadata(**kwargs.pop('metadata', {})) + metadata = self._get_metadata(**kwargs.pop("metadata", {})) try: - name = kwargs.pop('name', self._stac_obj.id) + name = kwargs.pop("name", self._stac_obj.id) except AttributeError: # Not currently tested. # ItemCollection does not require an id @@ -109,7 +112,7 @@ class StacCatalog(AbstractStacCatalog): https://pystac.readthedocs.io/en/latest/api.html?#catalog-spec """ - name = 'stac_catalog' + name = "stac_catalog" _stac_cls = pystac.Catalog def _load(self): @@ -128,16 +131,16 @@ def _load(self): description=subcatalog.description, driver=driver, # recursive catalog=self, - args={'stac_obj': subcatalog.get_self_href()}, + args={"stac_obj": subcatalog.get_self_href()}, ) for item in self._stac_obj.get_items(): self._entries[item.id] = LocalCatalogEntry( name=item.id, - description='', + description="", driver=StacItem, catalog=self, - args={'stac_obj': item}, + args={"stac_obj": item}, ) def _get_metadata(self, **kwargs): @@ -146,7 +149,7 @@ def _get_metadata(self, **kwargs): """ # NOTE: why not links? metadata = self._stac_obj.to_dict() - del metadata['links'] + del metadata["links"] return metadata @@ -160,7 +163,7 @@ class StacCollection(StacCatalog): future Collection-specific attributes and methods. """ - name = 'stac_catalog' + name = "stac_catalog" _stac_cls = pystac.Collection def to_dask(self, asset, storage_options=None, **kwargs): @@ -170,12 +173,12 @@ def to_dask(self, asset, storage_options=None, **kwargs): Parameters ---------- asset : str, optional - The asset key to use if multiple Zarr assets are provided. + The asset key to load. storage_options : dict, optional - Additional storage opens to use in :meth:`xarray.open_zarr`. Merged with - ``self.storage_options`` + Additional arguments for the backend fsspec filesystem. Merged with ``self.storage_options``. **kwargs - Additional keyword options are provided to :class:`intake_xarray.ZarrSource`. + Additional keyword options are provided to the loader, for example ``consolidated=True`` + to pass to :meth:`xarray.open_zarr`. Returns ------- @@ -186,11 +189,16 @@ def to_dask(self, asset, storage_options=None, **kwargs): asset_ = self._stac_obj.assets[asset] except KeyError: raise KeyError( - f'No asset named {asset}. Should be one of {list(self._stac_obj.assets)}' + f"No asset named {asset}. Should be one of {list(self._stac_obj.assets)}" ) from None - storage_options = {**(self.storage_options or {}), **(storage_options or {})} - return StacAsset(asset, asset_)(storage_options=storage_options, **kwargs).to_dask() + storage_options = { + **(self.storage_options or {}), + **(storage_options or {}), + } + return StacAsset(asset, asset_)( + storage_options=storage_options, **kwargs + ).to_dask() class StacItemCollection(AbstractStacCatalog): @@ -202,22 +210,24 @@ class StacItemCollection(AbstractStacCatalog): https://pystac.readthedocs.io/en/latest/api.html?#single-file-stac-extension """ - name = 'stac_itemcollection' + name = "stac_itemcollection" _stac_cls = pystac.Catalog def _load(self): """ Load the STAC Item Collection. """ - if not self._stac_obj.ext.implements('single-file-stac'): - raise ValueError("StacItemCollection requires 'single-file-stac' extension") - for feature in self._stac_obj.ext['single-file-stac'].features: + if not self._stac_obj.ext.implements("single-file-stac"): + raise ValueError( + "StacItemCollection requires 'single-file-stac' extension" + ) + for feature in self._stac_obj.ext["single-file-stac"].features: self._entries[feature.id] = LocalCatalogEntry( name=feature.id, - description='', + description="", driver=StacItem, catalog=self, - args={'stac_obj': feature}, + args={"stac_obj": feature}, ) def to_geopandas(self, crs=None): @@ -238,12 +248,12 @@ def to_geopandas(self, crs=None): import geopandas as gpd except ImportError: raise ImportError( - 'Using to_geopandas requires the `geopandas` package.' - 'You can install it via Pip or Conda.' + "Using to_geopandas requires the `geopandas` package." + "You can install it via Pip or Conda." ) if crs is None: - crs = 'epsg:4326' + crs = "epsg:4326" gf = gpd.GeoDataFrame.from_features(self._stac_obj.to_dict(), crs=crs) return gf @@ -254,7 +264,7 @@ class StacItem(AbstractStacCatalog): https://pystac.readthedocs.io/en/latest/api.html#item-spec """ - name = 'stac_item' + name = "stac_item" _stac_cls = pystac.Item def _load(self): @@ -266,7 +276,7 @@ def _load(self): def _get_metadata(self, **kwargs): metadata = self._stac_obj.properties.copy() - for attr in ['bbox', 'geometry', 'datetime', 'date']: + for attr in ["bbox", "geometry", "datetime", "date"]: metadata[attr] = getattr(self._stac_obj, attr, None) metadata.update(kwargs) return metadata @@ -280,15 +290,15 @@ def _get_band_info(self): # NOTE: ensure we test these scenarios # FileNotFoundError: [Errno 2] No such file or directory: '/catalog.json' collection = self._stac_obj.get_collection() - if 'item-assets' in collection.stac_extensions: - for val in collection.ext['item_assets']: - if 'eo:bands' in val: - band_info.append(val.get('eo:bands')[0]) + if "item-assets" in collection.stac_extensions: + for val in collection.ext["item_assets"]: + if "eo:bands" in val: + band_info.append(val.get("eo:bands")[0]) else: - band_info = collection.summaries['eo:bands'] + band_info = collection.summaries["eo:bands"] except Exception: - for band in self._stac_obj.ext['eo'].get_bands(): + for band in self._stac_obj.ext["eo"].get_bands(): band_info.append(band.to_dict()) finally: if not band_info: @@ -297,7 +307,7 @@ def _get_band_info(self): ) return band_info - def stack_bands(self, bands, path_as_pattern=None, concat_dim='band'): + def stack_bands(self, bands, path_as_pattern=None, concat_dim="band"): """ Stack the listed bands over the ``band`` dimension. @@ -328,8 +338,10 @@ def stack_bands(self, bands, path_as_pattern=None, concat_dim='band'): stack = item.stack_bands(['B4','B5'], path_as_pattern='{band}.TIF') da = stack(chunks=dict(band=1, x=2048, y=2048)).to_dask() """ - if 'eo' not in self._stac_obj.stac_extensions: - raise ValueError('STAC Item must implement "eo" extension to use this method') + if "eo" not in self._stac_obj.stac_extensions: + raise ValueError( + 'STAC Item must implement "eo" extension to use this method' + ) band_info = self._get_band_info() configDict = {} @@ -341,20 +353,29 @@ def stack_bands(self, bands, path_as_pattern=None, concat_dim='band'): for band in bands: # band can be band id, name or common_name if band in assets: - info = next((b for b in band_info if b.get('id', b.get('name')) == band), None,) + info = next( + ( + b + for b in band_info + if b.get("id", b.get("name")) == band + ), + None, + ) else: - info = next((b for b in band_info if b.get('common_name') == band), None) + info = next( + (b for b in band_info if b.get("common_name") == band), None + ) if info is not None: - band = info.get('id', info.get('name')) + band = info.get("id", info.get("name")) if band not in assets or info is None: valid_band_names = [] for b in band_info: - valid_band_names.append(b.get('id', b.get('name'))) - valid_band_names.append(b.get('common_name')) + valid_band_names.append(b.get("id", b.get("name"))) + valid_band_names.append(b.get("common_name")) raise ValueError( - f'{band} not found in list of eo:bands in collection.' - f'Valid values: {sorted(list(set(valid_band_names)))}' + f"{band} not found in list of eo:bands in collection." + f"Valid values: {sorted(list(set(valid_band_names)))}" ) asset = assets.get(band) metadatas[band] = asset.to_dict() @@ -365,15 +386,18 @@ def stack_bands(self, bands, path_as_pattern=None, concat_dim='band'): unique_types = set(types) if len(unique_types) != 1: raise ValueError( - f'Stacking failed: bands must have type, multiple found: {unique_types}' + f"Stacking failed: bands must have type, multiple found: {unique_types}" ) - configDict['name'] = '_'.join(bands) - configDict['description'] = ', '.join(titles) - configDict['args'] = dict( - chunks={}, concat_dim=concat_dim, path_as_pattern=path_as_pattern, urlpath=hrefs + configDict["name"] = "_".join(bands) + configDict["description"] = ", ".join(titles) + configDict["args"] = dict( + chunks={}, + concat_dim=concat_dim, + path_as_pattern=path_as_pattern, + urlpath=hrefs, ) - configDict['metadata'] = metadatas + configDict["metadata"] = metadatas return CombinedAssets(configDict) @@ -384,7 +408,7 @@ class StacAsset(LocalCatalogEntry): https://pystac.readthedocs.io/en/latest/api.html#asset """ - name = 'stac_asset' + name = "stac_asset" _stac_cls = pystac.item.Asset def __init__(self, key, asset): @@ -410,7 +434,7 @@ def _get_metadata(self, asset): metadata = asset.to_dict() default_plot = self._get_plot(asset) if default_plot: - metadata['plots'] = default_plot + metadata["plots"] = default_plot return metadata @@ -422,13 +446,13 @@ def _get_plot(self, asset): default_plot = None type = asset.media_type if type: - if type in ['image/jpeg', 'image/jpg', 'image/png']: + if type in ["image/jpeg", "image/jpg", "image/png"]: default_plot = dict( thumbnail=dict( - kind='rgb', - x='x', - y='y', - bands='channel', + kind="rgb", + x="x", + y="y", + bands="channel", data_aspect=1, flip_yaxis=True, xaxis=False, @@ -436,17 +460,17 @@ def _get_plot(self, asset): ) ) - elif 'tiff' in type: + elif "tiff" in type: default_plot = dict( geotiff=dict( - kind='image', - x='x', - y='y', + kind="image", + x="x", + y="y", frame_width=500, data_aspect=1, rasterize=True, dynamic=True, - cmap='viridis', + cmap="viridis", ) ) @@ -458,11 +482,11 @@ def _get_driver(self, asset): """ entry_type = asset.media_type - if entry_type in ['', 'null', None]: + if entry_type in ["", "null", None]: suffix = os.path.splitext(asset.media_type)[-1] - if suffix in ['.nc', '.h5', '.hdf']: - asset.media_type = 'application/netcdf' + if suffix in [".nc", ".h5", ".hdf"]: + asset.media_type = "application/netcdf" warnings.warn( f'STAC Asset "type" missing, assigning {entry_type} based on href suffix {suffix}:\n{asset.media_type}' # noqa: E501 ) @@ -482,8 +506,8 @@ def _get_args(self, asset, driver): """ Optional keyword arguments to pass to intake driver """ - args = {'urlpath': asset.href} - if driver in ['netcdf', 'rasterio', 'xarray_image']: + args = {"urlpath": asset.href} + if driver in ["netcdf", "rasterio", "xarray_image"]: # NOTE: force using dask? args.update(chunks={}) @@ -500,10 +524,10 @@ def __init__(self, configDict): configDict = intake Entry dictionary from stack_bands() method """ super().__init__( - name=configDict['name'], - description=configDict['description'], - driver='rasterio', # stack_bands only relevant to rasterio driver? + name=configDict["name"], + description=configDict["description"], + driver="rasterio", # stack_bands only relevant to rasterio driver? direct_access=True, - args=configDict['args'], - metadata=configDict['metadata'], + args=configDict["args"], + metadata=configDict["metadata"], ) From ced714924cc1e4e69fa7de3c5a0747306eb0f826 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 14 Jun 2021 09:52:22 -0500 Subject: [PATCH 06/15] Revert "fixup" This reverts commit dc5e89bad6fa90170ad209195dc6fa4e9d9de500. --- intake_stac/catalog.py | 218 ++++++++++++++++++----------------------- 1 file changed, 97 insertions(+), 121 deletions(-) diff --git a/intake_stac/catalog.py b/intake_stac/catalog.py index 4bd56dd..9c919c9 100644 --- a/intake_stac/catalog.py +++ b/intake_stac/catalog.py @@ -6,37 +6,37 @@ from intake.catalog.local import LocalCatalogEntry from pkg_resources import get_distribution -__version__ = get_distribution("intake_stac").version +__version__ = get_distribution('intake_stac').version # STAC catalog asset 'type' determines intake driver: # https://github.com/radiantearth/stac-spec/blob/master/item-spec/item-spec.md#media-types -default_type = "application/rasterio" -default_driver = "rasterio" +default_type = 'application/rasterio' +default_driver = 'rasterio' drivers = { - "application/netcdf": "netcdf", - "application/x-netcdf": "netcdf", - "application/parquet": "parquet", - "application/x-parquet": "parquet", - "application/x-hdf": "netcdf", - "application/x-hdf5": "netcdf", - "application/rasterio": "rasterio", - "image/vnd.stac.geotiff": "rasterio", - "image/vnd.stac.geotiff; cloud-optimized=true": "rasterio", - "image/x.geotiff": "rasterio", - "image/tiff; application=geotiff": "rasterio", - "image/tiff; application=geotiff; profile=cloud-optimized": "rasterio", # noqa: E501 - "image/jp2": "rasterio", - "image/png": "xarray_image", - "image/jpg": "xarray_image", - "image/jpeg": "xarray_image", - "text/xml": "textfiles", - "text/plain": "textfiles", - "text/html": "textfiles", - "application/json": "textfiles", - "application/geo+json": "geopandas", - "application/geopackage+sqlite3": "geopandas", - "application/vnd+zarr": "zarr", + 'application/netcdf': 'netcdf', + 'application/x-netcdf': 'netcdf', + 'application/parquet': 'parquet', + 'application/x-parquet': 'parquet', + 'application/x-hdf': 'netcdf', + 'application/x-hdf5': 'netcdf', + 'application/rasterio': 'rasterio', + 'image/vnd.stac.geotiff': 'rasterio', + 'image/vnd.stac.geotiff; cloud-optimized=true': 'rasterio', + 'image/x.geotiff': 'rasterio', + 'image/tiff; application=geotiff': 'rasterio', + 'image/tiff; application=geotiff; profile=cloud-optimized': 'rasterio', # noqa: E501 + 'image/jp2': 'rasterio', + 'image/png': 'xarray_image', + 'image/jpg': 'xarray_image', + 'image/jpeg': 'xarray_image', + 'text/xml': 'textfiles', + 'text/plain': 'textfiles', + 'text/html': 'textfiles', + 'application/json': 'textfiles', + 'application/geo+json': 'geopandas', + 'application/geopackage+sqlite3': 'geopandas', + 'application/vnd+zarr': 'zarr', } @@ -61,14 +61,11 @@ def __init__(self, stac_obj, **kwargs): elif isinstance(stac_obj, str): self._stac_obj = self._stac_cls.from_file(stac_obj) else: - raise ValueError( - "Expected %s instance, got: %s" - % (self._stac_cls, type(stac_obj)) - ) + raise ValueError('Expected %s instance, got: %s' % (self._stac_cls, type(stac_obj))) - metadata = self._get_metadata(**kwargs.pop("metadata", {})) + metadata = self._get_metadata(**kwargs.pop('metadata', {})) try: - name = kwargs.pop("name", self._stac_obj.id) + name = kwargs.pop('name', self._stac_obj.id) except AttributeError: # Not currently tested. # ItemCollection does not require an id @@ -112,7 +109,7 @@ class StacCatalog(AbstractStacCatalog): https://pystac.readthedocs.io/en/latest/api.html?#catalog-spec """ - name = "stac_catalog" + name = 'stac_catalog' _stac_cls = pystac.Catalog def _load(self): @@ -131,16 +128,16 @@ def _load(self): description=subcatalog.description, driver=driver, # recursive catalog=self, - args={"stac_obj": subcatalog.get_self_href()}, + args={'stac_obj': subcatalog.get_self_href()}, ) for item in self._stac_obj.get_items(): self._entries[item.id] = LocalCatalogEntry( name=item.id, - description="", + description='', driver=StacItem, catalog=self, - args={"stac_obj": item}, + args={'stac_obj': item}, ) def _get_metadata(self, **kwargs): @@ -149,7 +146,7 @@ def _get_metadata(self, **kwargs): """ # NOTE: why not links? metadata = self._stac_obj.to_dict() - del metadata["links"] + del metadata['links'] return metadata @@ -163,7 +160,7 @@ class StacCollection(StacCatalog): future Collection-specific attributes and methods. """ - name = "stac_catalog" + name = 'stac_catalog' _stac_cls = pystac.Collection def to_dask(self, asset, storage_options=None, **kwargs): @@ -173,12 +170,12 @@ def to_dask(self, asset, storage_options=None, **kwargs): Parameters ---------- asset : str, optional - The asset key to load. + The asset key to use if multiple Zarr assets are provided. storage_options : dict, optional - Additional arguments for the backend fsspec filesystem. Merged with ``self.storage_options``. + Additional storage opens to use in :meth:`xarray.open_zarr`. Merged with + ``self.storage_options`` **kwargs - Additional keyword options are provided to the loader, for example ``consolidated=True`` - to pass to :meth:`xarray.open_zarr`. + Additional keyword options are provided to :class:`intake_xarray.ZarrSource`. Returns ------- @@ -189,16 +186,11 @@ def to_dask(self, asset, storage_options=None, **kwargs): asset_ = self._stac_obj.assets[asset] except KeyError: raise KeyError( - f"No asset named {asset}. Should be one of {list(self._stac_obj.assets)}" + f'No asset named {asset}. Should be one of {list(self._stac_obj.assets)}' ) from None - storage_options = { - **(self.storage_options or {}), - **(storage_options or {}), - } - return StacAsset(asset, asset_)( - storage_options=storage_options, **kwargs - ).to_dask() + storage_options = {**(self.storage_options or {}), **(storage_options or {})} + return StacAsset(asset, asset_)(storage_options=storage_options, **kwargs).to_dask() class StacItemCollection(AbstractStacCatalog): @@ -210,24 +202,22 @@ class StacItemCollection(AbstractStacCatalog): https://pystac.readthedocs.io/en/latest/api.html?#single-file-stac-extension """ - name = "stac_itemcollection" + name = 'stac_itemcollection' _stac_cls = pystac.Catalog def _load(self): """ Load the STAC Item Collection. """ - if not self._stac_obj.ext.implements("single-file-stac"): - raise ValueError( - "StacItemCollection requires 'single-file-stac' extension" - ) - for feature in self._stac_obj.ext["single-file-stac"].features: + if not self._stac_obj.ext.implements('single-file-stac'): + raise ValueError("StacItemCollection requires 'single-file-stac' extension") + for feature in self._stac_obj.ext['single-file-stac'].features: self._entries[feature.id] = LocalCatalogEntry( name=feature.id, - description="", + description='', driver=StacItem, catalog=self, - args={"stac_obj": feature}, + args={'stac_obj': feature}, ) def to_geopandas(self, crs=None): @@ -248,12 +238,12 @@ def to_geopandas(self, crs=None): import geopandas as gpd except ImportError: raise ImportError( - "Using to_geopandas requires the `geopandas` package." - "You can install it via Pip or Conda." + 'Using to_geopandas requires the `geopandas` package.' + 'You can install it via Pip or Conda.' ) if crs is None: - crs = "epsg:4326" + crs = 'epsg:4326' gf = gpd.GeoDataFrame.from_features(self._stac_obj.to_dict(), crs=crs) return gf @@ -264,7 +254,7 @@ class StacItem(AbstractStacCatalog): https://pystac.readthedocs.io/en/latest/api.html#item-spec """ - name = "stac_item" + name = 'stac_item' _stac_cls = pystac.Item def _load(self): @@ -276,7 +266,7 @@ def _load(self): def _get_metadata(self, **kwargs): metadata = self._stac_obj.properties.copy() - for attr in ["bbox", "geometry", "datetime", "date"]: + for attr in ['bbox', 'geometry', 'datetime', 'date']: metadata[attr] = getattr(self._stac_obj, attr, None) metadata.update(kwargs) return metadata @@ -290,15 +280,15 @@ def _get_band_info(self): # NOTE: ensure we test these scenarios # FileNotFoundError: [Errno 2] No such file or directory: '/catalog.json' collection = self._stac_obj.get_collection() - if "item-assets" in collection.stac_extensions: - for val in collection.ext["item_assets"]: - if "eo:bands" in val: - band_info.append(val.get("eo:bands")[0]) + if 'item-assets' in collection.stac_extensions: + for val in collection.ext['item_assets']: + if 'eo:bands' in val: + band_info.append(val.get('eo:bands')[0]) else: - band_info = collection.summaries["eo:bands"] + band_info = collection.summaries['eo:bands'] except Exception: - for band in self._stac_obj.ext["eo"].get_bands(): + for band in self._stac_obj.ext['eo'].get_bands(): band_info.append(band.to_dict()) finally: if not band_info: @@ -307,7 +297,7 @@ def _get_band_info(self): ) return band_info - def stack_bands(self, bands, path_as_pattern=None, concat_dim="band"): + def stack_bands(self, bands, path_as_pattern=None, concat_dim='band'): """ Stack the listed bands over the ``band`` dimension. @@ -338,10 +328,8 @@ def stack_bands(self, bands, path_as_pattern=None, concat_dim="band"): stack = item.stack_bands(['B4','B5'], path_as_pattern='{band}.TIF') da = stack(chunks=dict(band=1, x=2048, y=2048)).to_dask() """ - if "eo" not in self._stac_obj.stac_extensions: - raise ValueError( - 'STAC Item must implement "eo" extension to use this method' - ) + if 'eo' not in self._stac_obj.stac_extensions: + raise ValueError('STAC Item must implement "eo" extension to use this method') band_info = self._get_band_info() configDict = {} @@ -353,29 +341,20 @@ def stack_bands(self, bands, path_as_pattern=None, concat_dim="band"): for band in bands: # band can be band id, name or common_name if band in assets: - info = next( - ( - b - for b in band_info - if b.get("id", b.get("name")) == band - ), - None, - ) + info = next((b for b in band_info if b.get('id', b.get('name')) == band), None,) else: - info = next( - (b for b in band_info if b.get("common_name") == band), None - ) + info = next((b for b in band_info if b.get('common_name') == band), None) if info is not None: - band = info.get("id", info.get("name")) + band = info.get('id', info.get('name')) if band not in assets or info is None: valid_band_names = [] for b in band_info: - valid_band_names.append(b.get("id", b.get("name"))) - valid_band_names.append(b.get("common_name")) + valid_band_names.append(b.get('id', b.get('name'))) + valid_band_names.append(b.get('common_name')) raise ValueError( - f"{band} not found in list of eo:bands in collection." - f"Valid values: {sorted(list(set(valid_band_names)))}" + f'{band} not found in list of eo:bands in collection.' + f'Valid values: {sorted(list(set(valid_band_names)))}' ) asset = assets.get(band) metadatas[band] = asset.to_dict() @@ -386,18 +365,15 @@ def stack_bands(self, bands, path_as_pattern=None, concat_dim="band"): unique_types = set(types) if len(unique_types) != 1: raise ValueError( - f"Stacking failed: bands must have type, multiple found: {unique_types}" + f'Stacking failed: bands must have type, multiple found: {unique_types}' ) - configDict["name"] = "_".join(bands) - configDict["description"] = ", ".join(titles) - configDict["args"] = dict( - chunks={}, - concat_dim=concat_dim, - path_as_pattern=path_as_pattern, - urlpath=hrefs, + configDict['name'] = '_'.join(bands) + configDict['description'] = ', '.join(titles) + configDict['args'] = dict( + chunks={}, concat_dim=concat_dim, path_as_pattern=path_as_pattern, urlpath=hrefs ) - configDict["metadata"] = metadatas + configDict['metadata'] = metadatas return CombinedAssets(configDict) @@ -408,7 +384,7 @@ class StacAsset(LocalCatalogEntry): https://pystac.readthedocs.io/en/latest/api.html#asset """ - name = "stac_asset" + name = 'stac_asset' _stac_cls = pystac.item.Asset def __init__(self, key, asset): @@ -434,7 +410,7 @@ def _get_metadata(self, asset): metadata = asset.to_dict() default_plot = self._get_plot(asset) if default_plot: - metadata["plots"] = default_plot + metadata['plots'] = default_plot return metadata @@ -446,13 +422,13 @@ def _get_plot(self, asset): default_plot = None type = asset.media_type if type: - if type in ["image/jpeg", "image/jpg", "image/png"]: + if type in ['image/jpeg', 'image/jpg', 'image/png']: default_plot = dict( thumbnail=dict( - kind="rgb", - x="x", - y="y", - bands="channel", + kind='rgb', + x='x', + y='y', + bands='channel', data_aspect=1, flip_yaxis=True, xaxis=False, @@ -460,17 +436,17 @@ def _get_plot(self, asset): ) ) - elif "tiff" in type: + elif 'tiff' in type: default_plot = dict( geotiff=dict( - kind="image", - x="x", - y="y", + kind='image', + x='x', + y='y', frame_width=500, data_aspect=1, rasterize=True, dynamic=True, - cmap="viridis", + cmap='viridis', ) ) @@ -482,11 +458,11 @@ def _get_driver(self, asset): """ entry_type = asset.media_type - if entry_type in ["", "null", None]: + if entry_type in ['', 'null', None]: suffix = os.path.splitext(asset.media_type)[-1] - if suffix in [".nc", ".h5", ".hdf"]: - asset.media_type = "application/netcdf" + if suffix in ['.nc', '.h5', '.hdf']: + asset.media_type = 'application/netcdf' warnings.warn( f'STAC Asset "type" missing, assigning {entry_type} based on href suffix {suffix}:\n{asset.media_type}' # noqa: E501 ) @@ -506,8 +482,8 @@ def _get_args(self, asset, driver): """ Optional keyword arguments to pass to intake driver """ - args = {"urlpath": asset.href} - if driver in ["netcdf", "rasterio", "xarray_image"]: + args = {'urlpath': asset.href} + if driver in ['netcdf', 'rasterio', 'xarray_image']: # NOTE: force using dask? args.update(chunks={}) @@ -524,10 +500,10 @@ def __init__(self, configDict): configDict = intake Entry dictionary from stack_bands() method """ super().__init__( - name=configDict["name"], - description=configDict["description"], - driver="rasterio", # stack_bands only relevant to rasterio driver? + name=configDict['name'], + description=configDict['description'], + driver='rasterio', # stack_bands only relevant to rasterio driver? direct_access=True, - args=configDict["args"], - metadata=configDict["metadata"], + args=configDict['args'], + metadata=configDict['metadata'], ) From c4984b87658deeb3b243df5caab4b619d87e56cd Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 14 Jun 2021 09:53:39 -0500 Subject: [PATCH 07/15] fixup --- intake_stac/catalog.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/intake_stac/catalog.py b/intake_stac/catalog.py index 9c919c9..39af5cf 100644 --- a/intake_stac/catalog.py +++ b/intake_stac/catalog.py @@ -172,10 +172,10 @@ def to_dask(self, asset, storage_options=None, **kwargs): asset : str, optional The asset key to use if multiple Zarr assets are provided. storage_options : dict, optional - Additional storage opens to use in :meth:`xarray.open_zarr`. Merged with - ``self.storage_options`` + Additional arguments for the backend fsspec filesystem. Merged with ``self.storage_options``. **kwargs - Additional keyword options are provided to :class:`intake_xarray.ZarrSource`. + Additional keyword options are provided to the loader, for example ``consolidated=True`` + to pass to :meth:`xarray.open_zarr`. Returns ------- From 14fff3f9ff95bc0d403838170eb973497d763607 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 14 Jun 2021 09:57:06 -0500 Subject: [PATCH 08/15] fixup docs --- intake_stac/catalog.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/intake_stac/catalog.py b/intake_stac/catalog.py index 39af5cf..143fc10 100644 --- a/intake_stac/catalog.py +++ b/intake_stac/catalog.py @@ -177,10 +177,14 @@ def to_dask(self, asset, storage_options=None, **kwargs): Additional keyword options are provided to the loader, for example ``consolidated=True`` to pass to :meth:`xarray.open_zarr`. + Notes + ----- + The Media Type of the asset will be used to determine how to load the data. + Returns ------- - xarray.Dataset - The Zarr dataset located at `asset` loaded into an xarray Dataset. + xarray.Dataset, pandas.DataFrame + The dataset described by the asset loaded into a dask-backed object. """ try: asset_ = self._stac_obj.assets[asset] From ed8efc0493dbd131ec93f3da1eaf818a2f842d63 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 25 Jun 2021 07:03:38 -0500 Subject: [PATCH 09/15] fixups --- intake_stac/catalog.py | 6 +++--- setup.cfg | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/intake_stac/catalog.py b/intake_stac/catalog.py index 2482609..dccf4c6 100644 --- a/intake_stac/catalog.py +++ b/intake_stac/catalog.py @@ -173,7 +173,7 @@ def to_dask(self, asset, storage_options=None, **kwargs): asset : str, optional The asset key to use if multiple Zarr assets are provided. storage_options : dict, optional - Additional arguments for the backend fsspec filesystem. Merged with ``self.storage_options``. + Additional arguments for the backend fsspec filesystem. **kwargs Additional keyword options are provided to the loader, for example ``consolidated=True`` to pass to :meth:`xarray.open_zarr`. @@ -184,7 +184,7 @@ def to_dask(self, asset, storage_options=None, **kwargs): Returns ------- - xarray.Dataset, pandas.DataFrame + xarray.Dataset, pandas.DataFrame The dataset described by the asset loaded into a dask-backed object. """ try: @@ -194,7 +194,7 @@ def to_dask(self, asset, storage_options=None, **kwargs): f'No asset named {asset}. Should be one of {list(self._stac_obj.assets)}' ) from None - storage_options = {**(self.storage_options or {}), **(storage_options or {})} + storage_options = storage_options or {} return StacAsset(asset, asset_)(storage_options=storage_options, **kwargs).to_dask() diff --git a/setup.cfg b/setup.cfg index 9d44cb8..fd8bcb9 100644 --- a/setup.cfg +++ b/setup.cfg @@ -11,7 +11,7 @@ select = B,C,E,F,W,T4,B9 [isort] known_first_party=intake_stac -known_third_party=intake,pkg_resources,pystac,pytest,setuptools +known_third_party=fsspec,intake,numpy,pkg_resources,pystac,pytest,setuptools,xarray multi_line_output=3 include_trailing_comma=True force_grid_wrap=0 From 1d3af7fab4079e8e389e8309cd5a6e32dda60251 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 19 Oct 2021 11:03:47 -0500 Subject: [PATCH 10/15] Updates --- intake_stac/catalog.py | 20 +++++++++++++++++++- intake_stac/tests/test_catalog.py | 17 +++++++++++++++++ 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/intake_stac/catalog.py b/intake_stac/catalog.py index c5bc83e..56192a1 100644 --- a/intake_stac/catalog.py +++ b/intake_stac/catalog.py @@ -166,7 +166,14 @@ class StacCollection(StacCatalog): name = 'stac_catalog' _stac_cls = pystac.Collection - def to_dask(self, asset, storage_options=None, **kwargs): + def to_dask( + self, + asset, + storage_options=None, + merge_asset_storage_options=True, + merge_asset_open_kwargs=True, + **kwargs, + ): r""" Load a collection-level asset to a Dask-backed object. @@ -176,6 +183,9 @@ def to_dask(self, asset, storage_options=None, **kwargs): The asset key to use if multiple Zarr assets are provided. storage_options : dict, optional Additional arguments for the backend fsspec filesystem. + merge_asset_storage_option : bool, default True + Whether to merge the storage options provided by the asset under the + ``xarray:storage_options`` key with `storage_options`. **kwargs Additional keyword options are provided to the loader, for example ``consolidated=True`` to pass to :meth:`xarray.open_zarr`. @@ -197,6 +207,14 @@ def to_dask(self, asset, storage_options=None, **kwargs): ) from None storage_options = storage_options or {} + if merge_asset_storage_options: + asset_storage_options = asset_.extra_fields.get('xarray:storage_options', {}) + storage_options.update(asset_storage_options) + + if merge_asset_open_kwargs: + asset_open_kwargs = asset_.extra_fields.get('xarray:open_kwargs', {}) + kwargs.update(asset_open_kwargs) + return StacAsset(asset, asset_)(storage_options=storage_options, **kwargs).to_dask() diff --git a/intake_stac/tests/test_catalog.py b/intake_stac/tests/test_catalog.py index 7e4916a..afe1ddd 100644 --- a/intake_stac/tests/test_catalog.py +++ b/intake_stac/tests/test_catalog.py @@ -345,3 +345,20 @@ def test_collection_level_assets(): intake_collection = StacCollection(collection) result = intake_collection.to_dask('data') xr.testing.assert_equal(result, ds) + + +def test_collection_level_asset_pc_https(): + collection = intake.open_stac_collection( + 'https://planetarycomputer.microsoft.com/api/stac/v1/collections/daymet-annual-hi' + ) + collection.to_dask('zarr-https') + + +@pytest.mark.importorskip('adlfs') +def test_collection_level_asset_pc_adlfs(): + collection = intake.open_stac_collection( + 'https://planetarycomputer.microsoft.com/api/stac/v1/collections/daymet-annual-hi' + ) + collection.to_dask( + 'zarr-abfs', consolidated=True + ) # planetary computer asset is missing consolidated From e7c47a4f98ed46ea48d4b0705f30bd82b55bf83b Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 19 Oct 2021 13:21:02 -0500 Subject: [PATCH 11/15] Handle items --- intake_stac/catalog.py | 16 ++++++++++++++++ intake_stac/tests/test_catalog.py | 10 ---------- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/intake_stac/catalog.py b/intake_stac/catalog.py index 56192a1..a40d8a2 100644 --- a/intake_stac/catalog.py +++ b/intake_stac/catalog.py @@ -4,6 +4,7 @@ import pystac from intake.catalog import Catalog from intake.catalog.local import LocalCatalogEntry +from intake.source import DataSource from pkg_resources import get_distribution from pystac.extensions.eo import EOExtension @@ -282,6 +283,20 @@ class StacItem(AbstractStacCatalog): name = 'stac_item' _stac_cls = pystac.Item + def __getitem__(self, key): + result = super().__getitem__(key) + # TODO: handle non-string assets? + asset = self._entries[key] + storage_options = asset._stac_obj.extra_fields.get('xarray:storage_options', {}) + open_kwargs = asset._stac_obj.extra_fields.get('xarray:open_kwargs', {}) + + if isinstance(result, DataSource): + kwargs = result._captured_init_kwargs + kwargs = {**kwargs, **dict(storage_options=storage_options), **open_kwargs} + result = result(*result._captured_init_args, **kwargs) + + return result + def _load(self): """ Load the STAC Item. @@ -409,6 +424,7 @@ def __init__(self, key, asset): Construct an Intake catalog 'Source' from a STAC Item Asset. asset = pystac.item.Asset """ + self._stac_obj = asset driver = self._get_driver(asset) super().__init__( diff --git a/intake_stac/tests/test_catalog.py b/intake_stac/tests/test_catalog.py index afe1ddd..3a57156 100644 --- a/intake_stac/tests/test_catalog.py +++ b/intake_stac/tests/test_catalog.py @@ -352,13 +352,3 @@ def test_collection_level_asset_pc_https(): 'https://planetarycomputer.microsoft.com/api/stac/v1/collections/daymet-annual-hi' ) collection.to_dask('zarr-https') - - -@pytest.mark.importorskip('adlfs') -def test_collection_level_asset_pc_adlfs(): - collection = intake.open_stac_collection( - 'https://planetarycomputer.microsoft.com/api/stac/v1/collections/daymet-annual-hi' - ) - collection.to_dask( - 'zarr-abfs', consolidated=True - ) # planetary computer asset is missing consolidated From 37dbecf81b27b78f3e4a7a6cf4eaa5edd4dc284a Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 19 Oct 2021 14:06:52 -0500 Subject: [PATCH 12/15] Updates * Tests for item-level, collection-level * Implemented `get_asset` --- intake_stac/catalog.py | 8 +- .../1.0.0/collection/zarr-collection.json | 499 ++++++++++++++++++ .../tests/data/1.0.0/item/zarr-item.json | 478 +++++++++++++++++ intake_stac/tests/test_catalog.py | 19 +- 4 files changed, 994 insertions(+), 10 deletions(-) create mode 100644 intake_stac/tests/data/1.0.0/collection/zarr-collection.json create mode 100644 intake_stac/tests/data/1.0.0/item/zarr-item.json diff --git a/intake_stac/catalog.py b/intake_stac/catalog.py index a40d8a2..df14485 100644 --- a/intake_stac/catalog.py +++ b/intake_stac/catalog.py @@ -167,7 +167,7 @@ class StacCollection(StacCatalog): name = 'stac_catalog' _stac_cls = pystac.Collection - def to_dask( + def get_asset( self, asset, storage_options=None, @@ -176,7 +176,7 @@ def to_dask( **kwargs, ): r""" - Load a collection-level asset to a Dask-backed object. + Get a datasource for a collection-level asset. Parameters ---------- @@ -197,7 +197,7 @@ def to_dask( Returns ------- - xarray.Dataset, pandas.DataFrame + DataSource The dataset described by the asset loaded into a dask-backed object. """ try: @@ -216,7 +216,7 @@ def to_dask( asset_open_kwargs = asset_.extra_fields.get('xarray:open_kwargs', {}) kwargs.update(asset_open_kwargs) - return StacAsset(asset, asset_)(storage_options=storage_options, **kwargs).to_dask() + return StacAsset(asset, asset_)(storage_options=storage_options, **kwargs) class StacItemCollection(AbstractStacCatalog): diff --git a/intake_stac/tests/data/1.0.0/collection/zarr-collection.json b/intake_stac/tests/data/1.0.0/collection/zarr-collection.json new file mode 100644 index 0000000..1467422 --- /dev/null +++ b/intake_stac/tests/data/1.0.0/collection/zarr-collection.json @@ -0,0 +1,499 @@ +{ + "type": "Collection", + "id": "daymet-daily-hi", + "stac_version": "1.0.0", + "description": "{{ collection.description }}", + "links": [ + { + "rel": "license", + "href": "https://science.nasa.gov/earth-science/earth-science-data/data-information-policy" + } + ], + "stac_extensions": [ + "https://stac-extensions.github.io/datacube/v2.0.0/schema.json" + ], + "cube:dimensions": { + "time": { + "type": "temporal", + "description": "24-hour day based on local time", + "extent": ["1980-01-01T12:00:00Z", "2020-12-30T12:00:00Z"] + }, + "x": { + "type": "spatial", + "axis": "x", + "description": "x coordinate of projection", + "extent": [-5802250.0, -5519250.0], + "step": 1000.0, + "reference_system": { + "$schema": "https://proj.org/schemas/v0.2/projjson.schema.json", + "type": "ProjectedCRS", + "name": "undefined", + "base_crs": { + "name": "undefined", + "datum": { + "type": "GeodeticReferenceFrame", + "name": "undefined", + "ellipsoid": { + "name": "undefined", + "semi_major_axis": 6378137, + "inverse_flattening": 298.257223563 + } + }, + "coordinate_system": { + "subtype": "ellipsoidal", + "axis": [ + { + "name": "Longitude", + "abbreviation": "lon", + "direction": "east", + "unit": "degree" + }, + { + "name": "Latitude", + "abbreviation": "lat", + "direction": "north", + "unit": "degree" + } + ] + } + }, + "conversion": { + "name": "unknown", + "method": { + "name": "Lambert Conic Conformal (2SP)", + "id": { + "authority": "EPSG", + "code": 9802 + } + }, + "parameters": [ + { + "name": "Latitude of 1st standard parallel", + "value": 25, + "unit": "degree", + "id": { + "authority": "EPSG", + "code": 8823 + } + }, + { + "name": "Latitude of 2nd standard parallel", + "value": 60, + "unit": "degree", + "id": { + "authority": "EPSG", + "code": 8824 + } + }, + { + "name": "Latitude of false origin", + "value": 42.5, + "unit": "degree", + "id": { + "authority": "EPSG", + "code": 8821 + } + }, + { + "name": "Longitude of false origin", + "value": -100, + "unit": "degree", + "id": { + "authority": "EPSG", + "code": 8822 + } + }, + { + "name": "Easting at false origin", + "value": 0, + "unit": "metre", + "id": { + "authority": "EPSG", + "code": 8826 + } + }, + { + "name": "Northing at false origin", + "value": 0, + "unit": "metre", + "id": { + "authority": "EPSG", + "code": 8827 + } + } + ] + }, + "coordinate_system": { + "subtype": "Cartesian", + "axis": [ + { + "name": "Easting", + "abbreviation": "E", + "direction": "east", + "unit": "metre" + }, + { + "name": "Northing", + "abbreviation": "N", + "direction": "north", + "unit": "metre" + } + ] + } + } + }, + "y": { + "type": "spatial", + "axis": "y", + "description": "y coordinate of projection", + "extent": [-622000.0, -39000.0], + "step": -1000.0, + "reference_system": { + "$schema": "https://proj.org/schemas/v0.2/projjson.schema.json", + "type": "ProjectedCRS", + "name": "undefined", + "base_crs": { + "name": "undefined", + "datum": { + "type": "GeodeticReferenceFrame", + "name": "undefined", + "ellipsoid": { + "name": "undefined", + "semi_major_axis": 6378137, + "inverse_flattening": 298.257223563 + } + }, + "coordinate_system": { + "subtype": "ellipsoidal", + "axis": [ + { + "name": "Longitude", + "abbreviation": "lon", + "direction": "east", + "unit": "degree" + }, + { + "name": "Latitude", + "abbreviation": "lat", + "direction": "north", + "unit": "degree" + } + ] + } + }, + "conversion": { + "name": "unknown", + "method": { + "name": "Lambert Conic Conformal (2SP)", + "id": { + "authority": "EPSG", + "code": 9802 + } + }, + "parameters": [ + { + "name": "Latitude of 1st standard parallel", + "value": 25, + "unit": "degree", + "id": { + "authority": "EPSG", + "code": 8823 + } + }, + { + "name": "Latitude of 2nd standard parallel", + "value": 60, + "unit": "degree", + "id": { + "authority": "EPSG", + "code": 8824 + } + }, + { + "name": "Latitude of false origin", + "value": 42.5, + "unit": "degree", + "id": { + "authority": "EPSG", + "code": 8821 + } + }, + { + "name": "Longitude of false origin", + "value": -100, + "unit": "degree", + "id": { + "authority": "EPSG", + "code": 8822 + } + }, + { + "name": "Easting at false origin", + "value": 0, + "unit": "metre", + "id": { + "authority": "EPSG", + "code": 8826 + } + }, + { + "name": "Northing at false origin", + "value": 0, + "unit": "metre", + "id": { + "authority": "EPSG", + "code": 8827 + } + } + ] + }, + "coordinate_system": { + "subtype": "Cartesian", + "axis": [ + { + "name": "Easting", + "abbreviation": "E", + "direction": "east", + "unit": "metre" + }, + { + "name": "Northing", + "abbreviation": "N", + "direction": "north", + "unit": "metre" + } + ] + } + } + }, + "nv": { + "type": "count", + "description": "Size of the 'time_bnds' variable.", + "values": [0, 1] + } + }, + "cube:variables": { + "dayl": { + "type": "data", + "description": "daylength", + "dimensions": ["time", "y", "x"], + "unit": "s", + "shape": [14965, 584, 284], + "chunks": [365, 584, 284], + "attrs": { + "cell_methods": "area: mean", + "grid_mapping": "lambert_conformal_conic", + "long_name": "daylength", + "units": "s" + } + }, + "lambert_conformal_conic": { + "type": "data", + "dimensions": [], + "shape": [], + "attrs": { + "false_easting": 0.0, + "false_northing": 0.0, + "grid_mapping_name": "lambert_conformal_conic", + "inverse_flattening": 298.257223563, + "latitude_of_projection_origin": 42.5, + "longitude_of_central_meridian": -100.0, + "semi_major_axis": 6378137.0, + "standard_parallel": [25.0, 60.0] + } + }, + "lat": { + "type": "auxiliary", + "description": "latitude coordinate", + "dimensions": ["y", "x"], + "unit": "degrees_north", + "shape": [584, 284], + "chunks": [584, 284], + "attrs": { + "long_name": "latitude coordinate", + "standard_name": "latitude", + "units": "degrees_north" + } + }, + "lon": { + "type": "auxiliary", + "description": "longitude coordinate", + "dimensions": ["y", "x"], + "unit": "degrees_east", + "shape": [584, 284], + "chunks": [584, 284], + "attrs": { + "long_name": "longitude coordinate", + "standard_name": "longitude", + "units": "degrees_east" + } + }, + "prcp": { + "type": "data", + "description": "daily total precipitation", + "dimensions": ["time", "y", "x"], + "unit": "mm/day", + "shape": [14965, 584, 284], + "chunks": [365, 584, 284], + "attrs": { + "cell_methods": "area: mean time: sum", + "grid_mapping": "lambert_conformal_conic", + "long_name": "daily total precipitation", + "units": "mm/day" + } + }, + "srad": { + "type": "data", + "description": "daylight average incident shortwave radiation", + "dimensions": ["time", "y", "x"], + "unit": "W/m2", + "shape": [14965, 584, 284], + "chunks": [365, 584, 284], + "attrs": { + "cell_methods": "area: mean time: mean", + "grid_mapping": "lambert_conformal_conic", + "long_name": "daylight average incident shortwave radiation", + "units": "W/m2" + } + }, + "swe": { + "type": "data", + "description": "snow water equivalent", + "dimensions": ["time", "y", "x"], + "unit": "kg/m2", + "shape": [14965, 584, 284], + "chunks": [365, 584, 284], + "attrs": { + "cell_methods": "area: mean time: mean", + "grid_mapping": "lambert_conformal_conic", + "long_name": "snow water equivalent", + "units": "kg/m2" + } + }, + "time_bnds": { + "type": "data", + "dimensions": ["time", "nv"], + "shape": [14965, 2], + "chunks": [365, 2], + "attrs": {} + }, + "tmax": { + "type": "data", + "description": "daily maximum temperature", + "dimensions": ["time", "y", "x"], + "unit": "degrees C", + "shape": [14965, 584, 284], + "chunks": [365, 584, 284], + "attrs": { + "cell_methods": "area: mean time: maximum", + "grid_mapping": "lambert_conformal_conic", + "long_name": "daily maximum temperature", + "units": "degrees C" + } + }, + "tmin": { + "type": "data", + "description": "daily minimum temperature", + "dimensions": ["time", "y", "x"], + "unit": "degrees C", + "shape": [14965, 584, 284], + "chunks": [365, 584, 284], + "attrs": { + "cell_methods": "area: mean time: minimum", + "grid_mapping": "lambert_conformal_conic", + "long_name": "daily minimum temperature", + "units": "degrees C" + } + }, + "vp": { + "type": "data", + "description": "daily average vapor pressure", + "dimensions": ["time", "y", "x"], + "unit": "Pa", + "shape": [14965, 584, 284], + "chunks": [365, 584, 284], + "attrs": { + "cell_methods": "area: mean time: mean", + "grid_mapping": "lambert_conformal_conic", + "long_name": "daily average vapor pressure", + "units": "Pa" + } + }, + "yearday": { + "type": "data", + "description": "day of year (DOY) starting with day 1 on January 1st", + "dimensions": ["time"], + "shape": [14965], + "chunks": [365], + "attrs": { + "long_name": "day of year (DOY) starting with day 1 on January 1st" + } + } + }, + "title": "Daymet Daily Hawaii", + "keywords": [ + "Daymet", + "Hawaii", + "Temperature", + "Precipitation", + "Vapor Pressure", + "Weather" + ], + "providers": [ + { + "name": "Microsoft", + "roles": ["host", "processor"], + "url": "https://planetarycomputer.microsoft.com" + }, + { + "name": "ORNL DAAC", + "roles": ["producer"], + "url": "https://doi.org/10.3334/ORNLDAAC/1840" + } + ], + "assets": { + "zarr-https": { + "href": "https://daymeteuwest.blob.core.windows.net/daymet-zarr/daily/hi.zarr", + "type": "application/vnd+zarr", + "title": "Daily Hawaii Daymet HTTPS Zarr root", + "description": "HTTPS URI of the daily Hawaii Daymet Zarr Group on Azure Blob Storage.", + "xarray:open_kwargs": { + "consolidated": true + }, + "roles": ["data", "zarr", "https"] + }, + "zarr-abfs": { + "href": "abfs://daymet-zarr/daily/hi.zarr", + "type": "application/vnd+zarr", + "title": "Daily Hawaii Daymet Azure Blob File System Zarr root", + "description": "Azure Blob File System of the daily Hawaii Daymet Zarr Group on Azure Blob Storage for use with adlfs.", + "xarray:storage_options": { + "account_name": "daymeteuwest" + }, + "xarray:open_kwargs": { + "consolidated": true + }, + "roles": ["data", "zarr", "abfs"] + }, + "thumbnail": { + "href": "https://ai4edatasetspublicassets.blob.core.windows.net/assets/pc_thumbnails/daymet-daily-hi.png", + "type": "image/png", + "title": "Daymet daily Hawaii map thumbnail", + "roles": ["thumbnail"] + } + }, + "msft:short_description": "Daily surface weather data on a 1-km grid for Hawaii", + "msft:storage_account": "daymeteuwest", + "msft:container": "daymet-zarr", + "msft:group_id": "daymet", + "msft:group_keys": ["daily", "hawaii"], + "extent": { + "spatial": { + "bbox": [[-160.3056, 17.9539, -154.772, 23.5186]] + }, + "temporal": { + "interval": [["1980-01-01T12:00:00Z", "2020-12-30T12:00:00Z"]] + } + }, + "license": "proprietary" +} diff --git a/intake_stac/tests/data/1.0.0/item/zarr-item.json b/intake_stac/tests/data/1.0.0/item/zarr-item.json new file mode 100644 index 0000000..6a494c5 --- /dev/null +++ b/intake_stac/tests/data/1.0.0/item/zarr-item.json @@ -0,0 +1,478 @@ +{ + "type": "Feature", + "stac_version": "1.0.0", + "id": "daymet-daily-hi", + "properties": { + "cube:dimensions": { + "time": { + "type": "temporal", + "description": "24-hour day based on local time", + "extent": ["1980-01-01T12:00:00Z", "2020-12-30T12:00:00Z"] + }, + "x": { + "type": "spatial", + "axis": "x", + "description": "x coordinate of projection", + "extent": [-5802250.0, -5519250.0], + "step": 1000.0, + "reference_system": { + "$schema": "https://proj.org/schemas/v0.2/projjson.schema.json", + "type": "ProjectedCRS", + "name": "undefined", + "base_crs": { + "name": "undefined", + "datum": { + "type": "GeodeticReferenceFrame", + "name": "undefined", + "ellipsoid": { + "name": "undefined", + "semi_major_axis": 6378137, + "inverse_flattening": 298.257223563 + } + }, + "coordinate_system": { + "subtype": "ellipsoidal", + "axis": [ + { + "name": "Longitude", + "abbreviation": "lon", + "direction": "east", + "unit": "degree" + }, + { + "name": "Latitude", + "abbreviation": "lat", + "direction": "north", + "unit": "degree" + } + ] + } + }, + "conversion": { + "name": "unknown", + "method": { + "name": "Lambert Conic Conformal (2SP)", + "id": { + "authority": "EPSG", + "code": 9802 + } + }, + "parameters": [ + { + "name": "Latitude of 1st standard parallel", + "value": 25, + "unit": "degree", + "id": { + "authority": "EPSG", + "code": 8823 + } + }, + { + "name": "Latitude of 2nd standard parallel", + "value": 60, + "unit": "degree", + "id": { + "authority": "EPSG", + "code": 8824 + } + }, + { + "name": "Latitude of false origin", + "value": 42.5, + "unit": "degree", + "id": { + "authority": "EPSG", + "code": 8821 + } + }, + { + "name": "Longitude of false origin", + "value": -100, + "unit": "degree", + "id": { + "authority": "EPSG", + "code": 8822 + } + }, + { + "name": "Easting at false origin", + "value": 0, + "unit": "metre", + "id": { + "authority": "EPSG", + "code": 8826 + } + }, + { + "name": "Northing at false origin", + "value": 0, + "unit": "metre", + "id": { + "authority": "EPSG", + "code": 8827 + } + } + ] + }, + "coordinate_system": { + "subtype": "Cartesian", + "axis": [ + { + "name": "Easting", + "abbreviation": "E", + "direction": "east", + "unit": "metre" + }, + { + "name": "Northing", + "abbreviation": "N", + "direction": "north", + "unit": "metre" + } + ] + } + } + }, + "y": { + "type": "spatial", + "axis": "y", + "description": "y coordinate of projection", + "extent": [-622000.0, -39000.0], + "step": -1000.0, + "reference_system": { + "$schema": "https://proj.org/schemas/v0.2/projjson.schema.json", + "type": "ProjectedCRS", + "name": "undefined", + "base_crs": { + "name": "undefined", + "datum": { + "type": "GeodeticReferenceFrame", + "name": "undefined", + "ellipsoid": { + "name": "undefined", + "semi_major_axis": 6378137, + "inverse_flattening": 298.257223563 + } + }, + "coordinate_system": { + "subtype": "ellipsoidal", + "axis": [ + { + "name": "Longitude", + "abbreviation": "lon", + "direction": "east", + "unit": "degree" + }, + { + "name": "Latitude", + "abbreviation": "lat", + "direction": "north", + "unit": "degree" + } + ] + } + }, + "conversion": { + "name": "unknown", + "method": { + "name": "Lambert Conic Conformal (2SP)", + "id": { + "authority": "EPSG", + "code": 9802 + } + }, + "parameters": [ + { + "name": "Latitude of 1st standard parallel", + "value": 25, + "unit": "degree", + "id": { + "authority": "EPSG", + "code": 8823 + } + }, + { + "name": "Latitude of 2nd standard parallel", + "value": 60, + "unit": "degree", + "id": { + "authority": "EPSG", + "code": 8824 + } + }, + { + "name": "Latitude of false origin", + "value": 42.5, + "unit": "degree", + "id": { + "authority": "EPSG", + "code": 8821 + } + }, + { + "name": "Longitude of false origin", + "value": -100, + "unit": "degree", + "id": { + "authority": "EPSG", + "code": 8822 + } + }, + { + "name": "Easting at false origin", + "value": 0, + "unit": "metre", + "id": { + "authority": "EPSG", + "code": 8826 + } + }, + { + "name": "Northing at false origin", + "value": 0, + "unit": "metre", + "id": { + "authority": "EPSG", + "code": 8827 + } + } + ] + }, + "coordinate_system": { + "subtype": "Cartesian", + "axis": [ + { + "name": "Easting", + "abbreviation": "E", + "direction": "east", + "unit": "metre" + }, + { + "name": "Northing", + "abbreviation": "N", + "direction": "north", + "unit": "metre" + } + ] + } + } + }, + "nv": { + "type": "count", + "description": "Size of the 'time_bnds' variable.", + "values": [0, 1] + } + }, + "cube:variables": { + "dayl": { + "type": "data", + "description": "daylength", + "dimensions": ["time", "y", "x"], + "unit": "s", + "shape": [14965, 584, 284], + "chunks": [365, 584, 284], + "attrs": { + "cell_methods": "area: mean", + "grid_mapping": "lambert_conformal_conic", + "long_name": "daylength", + "units": "s" + } + }, + "lambert_conformal_conic": { + "type": "data", + "dimensions": [], + "shape": [], + "attrs": { + "false_easting": 0.0, + "false_northing": 0.0, + "grid_mapping_name": "lambert_conformal_conic", + "inverse_flattening": 298.257223563, + "latitude_of_projection_origin": 42.5, + "longitude_of_central_meridian": -100.0, + "semi_major_axis": 6378137.0, + "standard_parallel": [25.0, 60.0] + } + }, + "lat": { + "type": "auxiliary", + "description": "latitude coordinate", + "dimensions": ["y", "x"], + "unit": "degrees_north", + "shape": [584, 284], + "chunks": [584, 284], + "attrs": { + "long_name": "latitude coordinate", + "standard_name": "latitude", + "units": "degrees_north" + } + }, + "lon": { + "type": "auxiliary", + "description": "longitude coordinate", + "dimensions": ["y", "x"], + "unit": "degrees_east", + "shape": [584, 284], + "chunks": [584, 284], + "attrs": { + "long_name": "longitude coordinate", + "standard_name": "longitude", + "units": "degrees_east" + } + }, + "prcp": { + "type": "data", + "description": "daily total precipitation", + "dimensions": ["time", "y", "x"], + "unit": "mm/day", + "shape": [14965, 584, 284], + "chunks": [365, 584, 284], + "attrs": { + "cell_methods": "area: mean time: sum", + "grid_mapping": "lambert_conformal_conic", + "long_name": "daily total precipitation", + "units": "mm/day" + } + }, + "srad": { + "type": "data", + "description": "daylight average incident shortwave radiation", + "dimensions": ["time", "y", "x"], + "unit": "W/m2", + "shape": [14965, 584, 284], + "chunks": [365, 584, 284], + "attrs": { + "cell_methods": "area: mean time: mean", + "grid_mapping": "lambert_conformal_conic", + "long_name": "daylight average incident shortwave radiation", + "units": "W/m2" + } + }, + "swe": { + "type": "data", + "description": "snow water equivalent", + "dimensions": ["time", "y", "x"], + "unit": "kg/m2", + "shape": [14965, 584, 284], + "chunks": [365, 584, 284], + "attrs": { + "cell_methods": "area: mean time: mean", + "grid_mapping": "lambert_conformal_conic", + "long_name": "snow water equivalent", + "units": "kg/m2" + } + }, + "time_bnds": { + "type": "data", + "dimensions": ["time", "nv"], + "shape": [14965, 2], + "chunks": [365, 2], + "attrs": {} + }, + "tmax": { + "type": "data", + "description": "daily maximum temperature", + "dimensions": ["time", "y", "x"], + "unit": "degrees C", + "shape": [14965, 584, 284], + "chunks": [365, 584, 284], + "attrs": { + "cell_methods": "area: mean time: maximum", + "grid_mapping": "lambert_conformal_conic", + "long_name": "daily maximum temperature", + "units": "degrees C" + } + }, + "tmin": { + "type": "data", + "description": "daily minimum temperature", + "dimensions": ["time", "y", "x"], + "unit": "degrees C", + "shape": [14965, 584, 284], + "chunks": [365, 584, 284], + "attrs": { + "cell_methods": "area: mean time: minimum", + "grid_mapping": "lambert_conformal_conic", + "long_name": "daily minimum temperature", + "units": "degrees C" + } + }, + "vp": { + "type": "data", + "description": "daily average vapor pressure", + "dimensions": ["time", "y", "x"], + "unit": "Pa", + "shape": [14965, 584, 284], + "chunks": [365, 584, 284], + "attrs": { + "cell_methods": "area: mean time: mean", + "grid_mapping": "lambert_conformal_conic", + "long_name": "daily average vapor pressure", + "units": "Pa" + } + }, + "yearday": { + "type": "data", + "description": "day of year (DOY) starting with day 1 on January 1st", + "dimensions": ["time"], + "shape": [14965], + "chunks": [365], + "attrs": { + "long_name": "day of year (DOY) starting with day 1 on January 1st" + } + } + }, + "start_datetime": "1980-01-01T12:00:00Z", + "end_datetime": "2020-12-30T12:00:00Z", + "datetime": null + }, + "geometry": { + "type": "Polygon", + "coordinates": [ + [ + [-154.7780670634169, 17.960033949329812], + [-154.7780670634169, 23.51232608231902], + [-160.2988400944475, 23.51232608231902], + [-160.2988400944475, 17.960033949329812], + [-154.7780670634169, 17.960033949329812] + ] + ] + }, + "links": [], + "assets": { + "zarr-https": { + "href": "https://daymeteuwest.blob.core.windows.net/daymet-zarr/daily/hi.zarr", + "type": "application/vnd+zarr", + "title": "Daily Hawaii Daymet HTTPS Zarr root", + "description": "HTTPS URI of the daily Hawaii Daymet Zarr Group on Azure Blob Storage.", + "xarray:open_kwargs": { + "consolidated": true + }, + "roles": ["data", "zarr", "https"] + }, + "zarr-abfs": { + "href": "abfs://daymet-zarr/daily/hi.zarr", + "type": "application/vnd+zarr", + "title": "Daily Hawaii Daymet Azure Blob File System Zarr root", + "description": "Azure Blob File System of the daily Hawaii Daymet Zarr Group on Azure Blob Storage for use with adlfs.", + "xarray:storage_options": { + "account_name": "daymeteuwest" + }, + "xarray:open_kwargs": { + "consolidated": true + }, + "roles": ["data", "zarr", "abfs"] + }, + "thumbnail": { + "href": "https://ai4edatasetspublicassets.blob.core.windows.net/assets/pc_thumbnails/daymet-daily-hi.png", + "type": "image/png", + "title": "Daymet daily Hawaii map thumbnail" + } + }, + "bbox": [ + -160.2988400944475, 17.960033949329812, -154.7780670634169, + 23.51232608231902 + ], + "stac_extensions": [ + "https://stac-extensions.github.io/datacube/v2.0.0/schema.json" + ] +} diff --git a/intake_stac/tests/test_catalog.py b/intake_stac/tests/test_catalog.py index 38fca59..1ea6cc4 100644 --- a/intake_stac/tests/test_catalog.py +++ b/intake_stac/tests/test_catalog.py @@ -343,12 +343,19 @@ def test_collection_level_assets(): # test intake_collection = StacCollection(collection) - result = intake_collection.to_dask('data') + result = intake_collection.get_asset('data').to_dask() xr.testing.assert_equal(result, ds) -def test_collection_level_asset_pc_https(): - collection = intake.open_stac_collection( - 'https://planetarycomputer.microsoft.com/api/stac/v1/collections/daymet-annual-hi' - ) - collection.to_dask('zarr-https') +def test_xarray_assets_item(): + item = intake.open_stac_item(str(here / 'data/1.0.0/item/zarr-item.json')) + asset = item['zarr-abfs'] + assert asset.kwargs == {'consolidated': True} + assert asset.storage_options == {'account_name': 'daymeteuwest'} + + +def test_xarray_assets_collection(): + item = intake.open_stac_collection(str(here / 'data/1.0.0/collection/zarr-collection.json')) + asset = item.get_asset('zarr-abfs') + assert asset.kwargs == {'consolidated': True} + assert asset.storage_options == {'account_name': 'daymeteuwest'} From f1dc6ffabf2d73db2e798b3007987903caf113aa Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 19 Oct 2021 14:18:21 -0500 Subject: [PATCH 13/15] Added small doc example --- docs/source/tutorial.rst | 44 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/docs/source/tutorial.rst b/docs/source/tutorial.rst index b2acc0a..5bcc971 100644 --- a/docs/source/tutorial.rst +++ b/docs/source/tutorial.rst @@ -135,3 +135,47 @@ Intake-stac can turn this object into an Intake catalog: catalog = intake.open_stac_item_collection('single-file-stac.json') list(catalog) + +Using xarray-assets +------------------- + +Intake-stac uses the `xarray-assets`_ STAC extension to automatically use the appropriate keywords to load a STAC asset into a data container. + +.. code-block:: python + + >>> collection = intake.open_stac_collection( + ... "https://planetarycomputer.microsoft.com/api/stac/v1/collections/daymet-annual-hi" + ... ) + + >>> source = collection.get_asset("zarr-https") + >>> source.to_dask() + + Dimensions: (nv: 2, time: 41, x: 284, y: 584) + Coordinates: + lat (y, x) float32 dask.array + lon (y, x) float32 dask.array + * time (time) datetime64[ns] 1980-07-01T12:00:00 ... 20... + * x (x) float32 -5.802e+06 -5.801e+06 ... -5.519e+06 + * y (y) float32 -3.9e+04 -4e+04 ... -6.21e+05 -6.22e+05 + Dimensions without coordinates: nv + Data variables: + lambert_conformal_conic int16 ... + prcp (time, y, x) float32 dask.array + swe (time, y, x) float32 dask.array + time_bnds (time, nv) datetime64[ns] dask.array + tmax (time, y, x) float32 dask.array + tmin (time, y, x) float32 dask.array + vp (time, y, x) float32 dask.array + Attributes: + Conventions: CF-1.6 + Version_data: Daymet Data Version 4.0 + Version_software: Daymet Software Version 4.0 + citation: Please see http://daymet.ornl.gov/ for current Daymet ... + references: Please see http://daymet.ornl.gov/ for current informa... + source: Daymet Software Version 4.0 + start_year: 1980 + +In that example, the STAC catalog indicates that the Zarr dataset should be opened with ``consolidated=True``, +so intake-stac automatically forwards that keyword argument through to xarray. + +.. _xarray-assets: https://github.com/stac-extensions/xarray-assets From 2fdccf3df18749ae98141bc8e14486acb87d17dc Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 19 Oct 2021 16:38:00 -0500 Subject: [PATCH 14/15] Update for comments --- docs/source/tutorial.rst | 9 ++++++--- intake_stac/catalog.py | 17 ++++++++++------- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/docs/source/tutorial.rst b/docs/source/tutorial.rst index 5bcc971..1d47d8c 100644 --- a/docs/source/tutorial.rst +++ b/docs/source/tutorial.rst @@ -141,6 +141,12 @@ Using xarray-assets Intake-stac uses the `xarray-assets`_ STAC extension to automatically use the appropriate keywords to load a STAC asset into a data container. +Intake-stac will automatically use the keywords from the `xarray-assets`_ STAC extension, if present, when loading data into a container. +For example, the STAC collection at defines an +asset ``zarr-https`` with the metadata ``"xarray:open_kwargs": {"consolidated": true}"`` to indicate that this dataset should be +opened with the ``consolidated=True`` keyword argument. This will be used automatically by ``.to_dask()`` + + .. code-block:: python >>> collection = intake.open_stac_collection( @@ -175,7 +181,4 @@ Intake-stac uses the `xarray-assets`_ STAC extension to automatically use the ap source: Daymet Software Version 4.0 start_year: 1980 -In that example, the STAC catalog indicates that the Zarr dataset should be opened with ``consolidated=True``, -so intake-stac automatically forwards that keyword argument through to xarray. - .. _xarray-assets: https://github.com/stac-extensions/xarray-assets diff --git a/intake_stac/catalog.py b/intake_stac/catalog.py index df14485..078735e 100644 --- a/intake_stac/catalog.py +++ b/intake_stac/catalog.py @@ -169,7 +169,7 @@ class StacCollection(StacCatalog): def get_asset( self, - asset, + key, storage_options=None, merge_asset_storage_options=True, merge_asset_open_kwargs=True, @@ -180,13 +180,16 @@ def get_asset( Parameters ---------- - asset : str, optional + key : str, optional The asset key to use if multiple Zarr assets are provided. storage_options : dict, optional Additional arguments for the backend fsspec filesystem. merge_asset_storage_option : bool, default True Whether to merge the storage options provided by the asset under the ``xarray:storage_options`` key with `storage_options`. + merge_asset_open_kwargs : bool, default True + Whether to merge the keywords provided by the asset under the + ``xarray:open_kwargs`` key with ``**kwargs``. **kwargs Additional keyword options are provided to the loader, for example ``consolidated=True`` to pass to :meth:`xarray.open_zarr`. @@ -201,22 +204,22 @@ def get_asset( The dataset described by the asset loaded into a dask-backed object. """ try: - asset_ = self._stac_obj.assets[asset] + asset = self._stac_obj.assets[key] except KeyError: raise KeyError( - f'No asset named {asset}. Should be one of {list(self._stac_obj.assets)}' + f'No asset named {key}. Should be one of {list(self._stac_obj.assets)}' ) from None storage_options = storage_options or {} if merge_asset_storage_options: - asset_storage_options = asset_.extra_fields.get('xarray:storage_options', {}) + asset_storage_options = asset.extra_fields.get('xarray:storage_options', {}) storage_options.update(asset_storage_options) if merge_asset_open_kwargs: - asset_open_kwargs = asset_.extra_fields.get('xarray:open_kwargs', {}) + asset_open_kwargs = asset.extra_fields.get('xarray:open_kwargs', {}) kwargs.update(asset_open_kwargs) - return StacAsset(asset, asset_)(storage_options=storage_options, **kwargs) + return StacAsset(asset, asset)(storage_options=storage_options, **kwargs) class StacItemCollection(AbstractStacCatalog): From c33d04761142f2c76c483aa64b8f518b42d27476 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 20 Oct 2021 12:21:03 -0500 Subject: [PATCH 15/15] asset key --- intake_stac/catalog.py | 2 +- intake_stac/tests/test_catalog.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/intake_stac/catalog.py b/intake_stac/catalog.py index 078735e..dd0eaff 100644 --- a/intake_stac/catalog.py +++ b/intake_stac/catalog.py @@ -219,7 +219,7 @@ def get_asset( asset_open_kwargs = asset.extra_fields.get('xarray:open_kwargs', {}) kwargs.update(asset_open_kwargs) - return StacAsset(asset, asset)(storage_options=storage_options, **kwargs) + return StacAsset(key, asset)(storage_options=storage_options, **kwargs) class StacItemCollection(AbstractStacCatalog): diff --git a/intake_stac/tests/test_catalog.py b/intake_stac/tests/test_catalog.py index 1ea6cc4..32f7fbb 100644 --- a/intake_stac/tests/test_catalog.py +++ b/intake_stac/tests/test_catalog.py @@ -343,8 +343,8 @@ def test_collection_level_assets(): # test intake_collection = StacCollection(collection) - result = intake_collection.get_asset('data').to_dask() - xr.testing.assert_equal(result, ds) + result = intake_collection.get_asset('data') + xr.testing.assert_equal(result.to_dask(), ds) def test_xarray_assets_item():