Skip to content

Commit

Permalink
✨ PySTACItemReaderIterDataPipe for reading STAC Items (#46)
Browse files Browse the repository at this point in the history
An iterable-style DataPipe for STAC items! Uses pystac for reading the files or URLs into pystac.Item objects.

* ✨ PySTACItemReaderIterDataPipe for reading STAC Items

An iterable-style DataPipe for STAC items! Uses pystac for reading the files or URLs. Included a doctest that checks the metadata within the pystac.item.Item object. Added a new section in the API docs and an intersphinx mapping.

* 💚 Make pystac an extras dependency in the spatial category

Ensure that zen3geo works even when `pystac` is not installed and add `pystac` to the spatial section of the extras dependencies in pyproject.toml.

* 🚚 Rename functional form as read_to_pystac_item

Decided that since the returned object is a `pystac.Item`, it should probably be `read_to_pystac_item`.

* ✅ Unit test to read JSON and return a pystac.Item

Ensure that a JSON STAC item can be read into a pystac.Item object that contains various spatiotemporal metadata.

* 💚 Skip PySTACItemReader doctests if pystac not installed

Use pytest.importorskip to skip running the doctest when pystac cannot be imported.
  • Loading branch information
weiji14 committed Sep 9, 2022
1 parent 5bc43da commit da0728a
Show file tree
Hide file tree
Showing 7 changed files with 158 additions and 2 deletions.
3 changes: 3 additions & 0 deletions docs/_config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,9 @@ sphinx:
pyogrio:
- 'https://pyogrio.readthedocs.io/en/latest/'
- null
pystac:
- 'https://pystac.readthedocs.io/en/latest/'
- null
python:
- 'https://docs.python.org/3/'
- null
Expand Down
9 changes: 9 additions & 0 deletions docs/api.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,15 @@
:show-inheritance:
```

### PySTAC

```{eval-rst}
.. automodule:: zen3geo.datapipes.pystac
.. autoclass:: zen3geo.datapipes.PySTACItemReader
.. autoclass:: zen3geo.datapipes.pystac.PySTACItemReaderIterDataPipe
:show-inheritance:
```

### Rioxarray

```{eval-rst}
Expand Down
4 changes: 2 additions & 2 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ docs = [
raster = ["xbatcher"]
spatial = [
"datashader",
"pystac",
"spatialpandas"
]
vector = ["pyogrio"]
Expand Down
1 change: 1 addition & 0 deletions zen3geo/datapipes/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,6 @@
GeoPandasRectangleClipperIterDataPipe as GeoPandasRectangleClipper,
)
from zen3geo.datapipes.pyogrio import PyogrioReaderIterDataPipe as PyogrioReader
from zen3geo.datapipes.pystac import PySTACItemReaderIterDataPipe as PySTACItemReader
from zen3geo.datapipes.rioxarray import RioXarrayReaderIterDataPipe as RioXarrayReader
from zen3geo.datapipes.xbatcher import XbatcherSlicerIterDataPipe as XbatcherSlicer
95 changes: 95 additions & 0 deletions zen3geo/datapipes/pystac.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
"""
DataPipes for :doc:`pystac <pystac:index>`.
"""
from typing import Any, Dict, Iterator, Optional

try:
import pystac
except ImportError:
pystac = None
from torchdata.datapipes import functional_datapipe
from torchdata.datapipes.iter import IterDataPipe


@functional_datapipe("read_to_pystac_item")
class PySTACItemReaderIterDataPipe(IterDataPipe):
"""
Takes files from local disk or URLs (as long as they can be read by pystac)
and yields :py:class:`pystac.Item` objects (functional name:
``read_to_pystac_item``).
Parameters
----------
source_datapipe : IterDataPipe[str]
A DataPipe that contains filepaths or URL links to STAC items.
kwargs : Optional
Extra keyword arguments to pass to :py:meth:`pystac.Item.from_file`.
Yields
------
stac_item : pystac.Item
An :py:class:`pystac.Item` object containing the specific STACObject
implementation class represented in a JSON format.
Raises
------
ModuleNotFoundError
If ``pystac`` is not installed. See
:doc:`install instructions for pystac <pystac:installation>`, (e.g. via
``pip install pystac``) before using this class.
Example
-------
>>> import pytest
>>> pystac = pytest.importorskip("pystac")
...
>>> from torchdata.datapipes.iter import IterableWrapper
>>> from zen3geo.datapipes import PySTACItemReader
...
>>> # Read in STAC Item using DataPipe
>>> item_url: str = "https://planetarycomputer.microsoft.com/api/stac/v1/collections/sentinel-2-l2a/items/S2A_MSIL2A_20220115T032101_R118_T48NUG_20220115T170435"
>>> dp = IterableWrapper(iterable=[item_url])
>>> dp_pystac = dp.read_to_pystac_item()
...
>>> # Loop or iterate over the DataPipe stream
>>> it = iter(dp_pystac)
>>> stac_item = next(it)
>>> stac_item.bbox
[103.20205689, 0.81602476, 104.18934086, 1.8096362]
>>> stac_item.properties # doctest: +NORMALIZE_WHITESPACE
{'datetime': '2022-01-15T03:21:01.024000Z',
'platform': 'Sentinel-2A',
'proj:epsg': 32648,
'instruments': ['msi'],
's2:mgrs_tile': '48NUG',
'constellation': 'Sentinel 2',
's2:granule_id': 'S2A_OPER_MSI_L2A_TL_ESRI_20220115T170436_A034292_T48NUG_N03.00',
'eo:cloud_cover': 17.352597,
's2:datatake_id': 'GS2A_20220115T032101_034292_N03.00',
's2:product_uri': 'S2A_MSIL2A_20220115T032101_N0300_R118_T48NUG_20220115T170435.SAFE',
's2:datastrip_id': 'S2A_OPER_MSI_L2A_DS_ESRI_20220115T170436_S20220115T033502_N03.00',
's2:product_type': 'S2MSI2A',
'sat:orbit_state': 'descending',
...
"""

def __init__(
self, source_datapipe: IterDataPipe[str], **kwargs: Optional[Dict[str, Any]]
) -> None:
if pystac is None:
raise ModuleNotFoundError(
"Package `pystac` is required to be installed to use this datapipe. "
"Please use `pip install pystac` or "
"`conda install -c conda-forge pystac` "
"to install the package"
)
self.source_datapipe: IterDataPipe[str] = source_datapipe
self.kwargs = kwargs

def __iter__(self) -> Iterator:
for href in self.source_datapipe:
yield pystac.Item.from_file(href=href, **self.kwargs)

def __len__(self) -> int:
return len(self.source_datapipe)
47 changes: 47 additions & 0 deletions zen3geo/tests/test_datapipes_pystac.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
"""
Tests for pystac datapipes.
"""
import pytest
from torchdata.datapipes.iter import IterableWrapper

from zen3geo.datapipes import PySTACItemReader

pystac = pytest.importorskip("pystac")

# %%
def test_pystac_item_reader():
"""
Ensure that PySTACItemReader works to read in a JSON STAC item and outputs
to a pystac.Item object.
"""
item_url: str = "https://github.com/stac-utils/pystac/raw/v1.6.1/tests/data-files/item/sample-item.json"
dp = IterableWrapper(iterable=[item_url])

# Using class constructors
dp_pystac = PySTACItemReader(source_datapipe=dp)
# Using functional form (recommended)
dp_pystac = dp.read_to_pystac_item()

assert len(dp_pystac) == 1
it = iter(dp_pystac)
stac_item = next(it)

assert stac_item.bbox == [-122.59750209, 37.48803556, -122.2880486, 37.613537207]
assert stac_item.datetime.isoformat() == "2016-05-03T13:22:30.040000+00:00"
assert stac_item.geometry["type"] == "Polygon"
assert stac_item.properties == {
"datetime": "2016-05-03T13:22:30.040000Z",
"title": "A CS3 item",
"license": "PDDL-1.0",
"providers": [
{
"name": "CoolSat",
"roles": ["producer", "licensor"],
"url": "https://cool-sat.com/",
}
],
}
assert (
stac_item.assets["analytic"].extra_fields["product"]
== "http://cool-sat.com/catalog/products/analytic.json"
)

0 comments on commit da0728a

Please sign in to comment.