Skip to content

Commit

Permalink
Merge pull request #73 from digitalearthpacific/retry-search
Browse files Browse the repository at this point in the history
Fix to missing NoOutputError, add fix_winding function, re-add client to searcher
  • Loading branch information
alexgleith authored Nov 10, 2024
2 parents 0d51912 + 0b90c5a commit 028f23a
Show file tree
Hide file tree
Showing 8 changed files with 83 additions and 29 deletions.
1 change: 0 additions & 1 deletion dep_tools/grids.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,6 @@ def grid(
Otherwise, a GeoDataFrame containing only the portions of each tile
that intersect the given GeoDataFrame is returned.
"""

if intersect_with is not None:
if return_type != "GridSpec":
full_grid = _geoseries(resolution, crs)
Expand Down
23 changes: 14 additions & 9 deletions dep_tools/landsat_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from shapely.geometry import box
from xarray import DataArray, Dataset

from dep_tools.utils import bbox_across_180
from dep_tools.utils import bbox_across_180, fix_winding


def cloud_mask(
Expand Down Expand Up @@ -66,13 +66,20 @@ def mask_clouds(
return xr.where(~mask)


def _pathrows():
pathrows = GeoDataFrame(
read_file(
"https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/s3fs-public/atoms/files/WRS2_descending_0.zip"
)
)
pathrows["geometry"] = pathrows.geometry.apply(fix_winding)
return pathrows


def pathrows_in_area(area: GeoDataFrame, pathrows: GeoDataFrame | None = None):
if pathrows is None:
pathrows = GeoDataFrame(
read_file(
"https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/s3fs-public/atoms/files/WRS2_descending_0.zip"
)
)
pathrows = _pathrows()

bbox = bbox_across_180(area)
if isinstance(bbox, tuple):
return pathrows[
Expand All @@ -99,9 +106,7 @@ def items_in_pathrows(


def pathrow_with_greatest_area(shapes: GeoDataFrame) -> Tuple[str, str]:
pathrows = read_file(
"https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/s3fs-public/atoms/files/WRS2_descending_0.zip"
)
pathrows = _pathrows()
intersection = shapes.overlay(pathrows, how="intersection")
row_with_greatest_area = intersection.iloc[[intersection.geometry.area.idxmax()]]
return (row_with_greatest_area.PATH.item(), row_with_greatest_area.ROW.item())
43 changes: 27 additions & 16 deletions dep_tools/searchers.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,9 @@ class PystacSearcher(Searcher):
:func:`dep_tools.utils.search_across_180`.
Args:
client: A search client.
catalog: The URL of a stac catalog. if client is specified, this is
ignored.
client: A search client. Either this or catalog must be specified.
raise_empty_collection_error: Whether an EmptyCollectionError exception
should be returned if no stac items are found.
**kwargs: Additional arguments passed to client.search(). For example,
Expand All @@ -44,11 +46,20 @@ class PystacSearcher(Searcher):

def __init__(
self,
catalog: str,
catalog: str | None = None,
client: Client | None = None,
raise_empty_collection_error: bool = True,
**kwargs,
):
self._client = Client.open(catalog)
if client and catalog:
warnings.warn(
"Arguments for both 'client' and 'catalog' passed to PystacSearcher, ignoring catalog"
)

if not (client or catalog):
raise ValueError("Must specify either client or catalog")

self._client = client if client else Client.open(catalog)
self._raise_errors = raise_empty_collection_error
self._kwargs = kwargs

Expand All @@ -65,12 +76,12 @@ def search(self, area: GeoDataFrame | GeoBox) -> ItemCollection:
region=area, client=self._client, **self._kwargs
)

if len(item_collection) == 0 and self._raise_errors:
raise EmptyCollectionError()

fix_bad_epsgs(item_collection)
item_collection = remove_bad_items(item_collection)

if len(item_collection) == 0 and self._raise_errors:
raise EmptyCollectionError()

return item_collection


Expand All @@ -80,7 +91,9 @@ class LandsatPystacSearcher(PystacSearcher):
query, just use :class:PystacSearcher.
Args:
client: A search client.
catalog: The URL of a stac catalog. if client is specified, this is
ignored.
client: A search client. Either this or catalog must be specified.
raise_empty_collection_error: Whether an EmptyCollectionError exception should
be returned if no stac items are found.
search_intersecting_pathrows: Whether to use landsat pathrows which
Expand All @@ -99,7 +112,8 @@ class LandsatPystacSearcher(PystacSearcher):

def __init__(
self,
catalog: str = "https://planetarycomputer.microsoft.com/api/stac/v1/",
catalog: str | None = None,
client: Client | None = None,
collections: list[str] | None = ["landsat-c2-l2"],
raise_empty_collection_error: bool = True,
search_intersecting_pathrows: bool = False,
Expand All @@ -110,6 +124,7 @@ def __init__(
):
super().__init__(
catalog=catalog,
client=client,
raise_empty_collection_error=raise_empty_collection_error,
**kwargs,
)
Expand Down Expand Up @@ -140,16 +155,9 @@ def __init__(

self._kwargs["query"] = query

if self._search_intersecting_pathrows:
self._landsat_pathrows = read_file(
"https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/s3fs-public/atoms/files/WRS2_descending_0.zip"
)

def search(self, area: GeoDataFrame):
search_area = (
pathrows_in_area(area, self._landsat_pathrows)
if self._search_intersecting_pathrows
else area
pathrows_in_area(area) if self._search_intersecting_pathrows else area
)
try:
items = super().search(search_area)
Expand All @@ -164,4 +172,7 @@ def search(self, area: GeoDataFrame):
if self._search_intersecting_pathrows:
items = items_in_pathrows(items, search_area)

if len(items) == 0 and self._raise_errors:
raise EmptyCollectionError()

return items
25 changes: 24 additions & 1 deletion dep_tools/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,13 @@
from pystac import ItemCollection
import pystac_client
from retry import retry
from shapely.geometry import LineString, MultiLineString, GeometryCollection
from shapely.geometry import (
LineString,
MultiLineString,
GeometryCollection,
MultiPolygon,
)
from shapely.geometry.polygon import orient
from xarray import DataArray, Dataset

# Set the timeout to five minutes, which is an extremely long time
Expand Down Expand Up @@ -108,6 +114,23 @@ def bbox_across_180(region: GeoDataFrame | GeoBox) -> BBOX | tuple[BBOX, BBOX]:
return BBOX(bbox)


from shapely import Geometry


def fix_winding(geom: Geometry) -> Geometry:
"""Fixes the orientation of the exterior coordinates of Polygon and
MultiPolygon geometry, which should run counterclockwise.
"""
# This is non-essential but resolves a barrage of warnings from
# the antimeridian package
if geom.geom_type == "Polygon" and not geom.exterior.is_ccw:
return orient(geom)
elif geom.geom_type == "MultiPolygon":
return MultiPolygon([fix_winding(p) for p in geom.geoms])
else:
return geom


def _fix_geometry(geometry):
if isinstance(geometry, GeometryCollection):
return GeometryCollection(
Expand Down
8 changes: 8 additions & 0 deletions tests/test_fix_winding.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
from shapely.geometry import Polygon, MultiPolygon
from dep_tools.utils import fix_winding


def test_fix_winding():
bad_poly = Polygon(shell=[[0, 0], [0, 1], [1, 1], [1, 0], [0, 0]])
assert not bad_poly.exterior.is_ccw
assert fix_winding(bad_poly).exterior.is_ccw
3 changes: 3 additions & 0 deletions tests/test_landsat_mspc_stac_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ def test_hobart_all(hobart_area):
exclude_platforms=None,
only_tier_one=False,
fall_back_to_tier_two=False,
catalog="https://planetarycomputer.microsoft.com/api/stac/v1/",
)

items = list(searcher.search(hobart_area))
Expand All @@ -42,6 +43,7 @@ def test_hobart_tier_one_only(hobart_area):
exclude_platforms=None,
only_tier_one=True,
fall_back_to_tier_two=False,
catalog="https://planetarycomputer.microsoft.com/api/stac/v1/",
)

items = list(searcher.search(hobart_area))
Expand All @@ -55,6 +57,7 @@ def test_hobart_tier_exclude_7(hobart_area):
exclude_platforms=["landsat-7"],
only_tier_one=False,
fall_back_to_tier_two=False,
catalog="https://planetarycomputer.microsoft.com/api/stac/v1/",
)

items = list(searcher.search(hobart_area))
Expand Down
4 changes: 3 additions & 1 deletion tests/test_landsat_mspc_stac_search_fixes.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,9 @@ def test_pathrows_in_area(near_antimeridian_area):

def test_search_for_stac_items_with_bad_geoms(near_antimeridian_area):
searcher = LandsatPystacSearcher(
datetime=DATETIME, search_intersecting_pathrows=True
datetime=DATETIME,
search_intersecting_pathrows=True,
catalog="https://planetarycomputer.microsoft.com/api/stac/v1/",
)

items = searcher.search(near_antimeridian_area)
Expand Down
5 changes: 4 additions & 1 deletion tests/test_searchers.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,10 @@ def test_PystacSearcher(area, mspc_catalog):


def test_LandsatPystacSearcher_exclude_platforms(area):
s = LandsatPystacSearcher(exclude_platforms=["landsat-7"])
s = LandsatPystacSearcher(
exclude_platforms=["landsat-7"],
catalog="https://planetarycomputer.microsoft.com/api/stac/v1/",
)
s.search(area)


Expand Down

0 comments on commit 028f23a

Please sign in to comment.