Skip to content

Commit

Permalink
Merge pull request #402 from duckontheweb/340-non-stac-error
Browse files Browse the repository at this point in the history
Raise exception in identify_stac_object for non-STAC objects
  • Loading branch information
Jon Duckworth authored Jun 9, 2021
2 parents eb15669 + 36303f6 commit 8789513
Show file tree
Hide file tree
Showing 20 changed files with 128 additions and 3,293 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ stdout*
.idea
.vscode


# Sphinx documentation
.ipynb_checkpoints/

Expand Down
4 changes: 3 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@

### Fixed

- Fixed returned None by `EOExtension.get_bands` for asset without EO bands ([#406](https://github.com/stac-utils/pystac/pull/406))
- `EOExtension.get_bands` returns `None` for asset without EO bands ([#406](https://github.com/stac-utils/pystac/pull/406))
- `identify_stac_object_type` returns `None` and `identify_stac_object` raises `STACTypeError` for non-STAC objects
([#402](https://github.com/stac-utils/pystac/pull/402))

### Removed

Expand Down
31 changes: 0 additions & 31 deletions pystac/extensions/eo.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
https://github.com/stac-extensions/eo
"""

import re
from typing import (
Any,
Dict,
Expand Down Expand Up @@ -498,36 +497,6 @@ class EOExtensionHooks(ExtensionHooks):
def migrate(
self, obj: Dict[str, Any], version: STACVersionID, info: STACJSONDescription
) -> None:
if version < "0.5":
if "eo:crs" in obj["properties"]:
# Try to pull out the EPSG code.
# Otherwise, just leave it alone.
wkt = obj["properties"]["eo:crs"]
matches = list(re.finditer(r'AUTHORITY\[[^\]]*\"(\d+)"\]', wkt))
if len(matches) > 0:
epsg_code = matches[-1].group(1)
obj["properties"].pop("eo:crs")
obj["properties"]["eo:epsg"] = int(epsg_code)

if version < "0.6":
# Change eo:bands from a dict to a list. eo:bands on an asset
# is an index instead of a dict key. eo:bands is in properties.
bands_dict = obj["eo:bands"]
keys_to_indices: Dict[str, int] = {}
bands: List[Dict[str, Any]] = []
for i, (k, band) in enumerate(bands_dict.items()):
keys_to_indices[k] = i
bands.append(band)

obj.pop("eo:bands")
obj["properties"]["eo:bands"] = bands
for k, asset in obj["assets"].items():
if "eo:bands" in asset:
asset_band_indices: List[int] = []
for bk in asset["eo:bands"]:
asset_band_indices.append(keys_to_indices[bk])
asset["eo:bands"] = sorted(asset_band_indices)

if version < "0.9":
# Some eo fields became common_metadata
if (
Expand Down
206 changes: 54 additions & 152 deletions pystac/serialization/identify.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,107 +185,12 @@ def _identify_stac_extensions(

# checksum
if "links" in d:
found_checksum = False
for link in d["links"]:
# Account for old links as dicts
if isinstance(link, str):
link_props = cast(Dict[str, Any], d["links"][link]).keys()
else:
link_props = cast(Dict[str, Any], link).keys()
link_props = cast(Dict[str, Any], link).keys()

if any(prop.startswith("checksum:") for prop in link_props):
found_checksum = True
stac_extensions.add(OldExtensionShortIDs.CHECKSUM.value)
if not found_checksum:
if "assets" in d:
for asset in d["assets"].values():
asset_props = cast(Dict[str, Any], asset).keys()
if any(prop.startswith("checksum:") for prop in asset_props):
found_checksum = True
stac_extensions.add(OldExtensionShortIDs.CHECKSUM.value)
if found_checksum:
version_range.set_min(STACVersionID("0.6.2"))

# datacube
if object_type == pystac.STACObjectType.ITEM:
if any(k.startswith("cube:") for k in cast(Dict[str, Any], d["properties"])):
stac_extensions.add(OldExtensionShortIDs.DATACUBE.value)
version_range.set_min(STACVersionID("0.6.1"))

# datetime-range (old extension)
if object_type == pystac.STACObjectType.ITEM:
if "dtr:start_datetime" in d["properties"]:
stac_extensions.add("datetime-range")
version_range.set_min(STACVersionID("0.6.0"))

# eo
if object_type == pystac.STACObjectType.ITEM:
if any(k.startswith("eo:") for k in cast(Dict[str, Any], d["properties"])):
stac_extensions.add(OldExtensionShortIDs.EO.value)
if "eo:epsg" in d["properties"]:
if d["properties"]["eo:epsg"] is None:
version_range.set_min(STACVersionID("0.6.1"))
if "eo:crs" in d["properties"]:
version_range.set_max(STACVersionID("0.4.1"))
if "eo:constellation" in d["properties"]:
version_range.set_min(STACVersionID("0.6.0"))
if "eo:bands" in d:
stac_extensions.add(OldExtensionShortIDs.EO.value)
version_range.set_max(STACVersionID("0.5.2"))

# pointcloud
if object_type == pystac.STACObjectType.ITEM:
if any(k.startswith("pc:") for k in cast(Dict[str, Any], d["properties"])):
stac_extensions.add(OldExtensionShortIDs.POINTCLOUD.value)
version_range.set_min(STACVersionID("0.6.2"))

# sar
if object_type == pystac.STACObjectType.ITEM:
if any(k.startswith("sar:") for k in cast(Dict[str, Any], d["properties"])):
stac_extensions.add(OldExtensionShortIDs.SAR.value)
version_range.set_min(STACVersionID("0.6.2"))
if version_range.contains("0.6.2"):
for prop in [
"sar:absolute_orbit",
"sar:resolution",
"sar:pixel_spacing",
"sar:looks",
]:
if prop in d["properties"]:
if isinstance(d["properties"][prop], list):
version_range.set_max(STACVersionID("0.6.2"))
if version_range.contains("0.7.0"):
for prop in [
"sar:incidence_angle",
"sar:relative_orbit",
"sar:observation_direction",
"sar:resolution_range",
"sar:resolution_azimuth",
"sar:pixel_spacing_range",
"sar:pixel_spacing_azimuth",
"sar:looks_range",
"sar:looks_azimuth",
"sar:looks_equivalent_number",
]:
if prop in d["properties"]:
version_range.set_min(STACVersionID("0.7.0"))
if "sar:absolute_orbit" in d["properties"] and not isinstance(
d["properties"]["sar:absolute_orbit"], list
):
version_range.set_min(STACVersionID("0.7.0"))
if "sar:off_nadir" in d["properties"]:
version_range.set_max(STACVersionID("0.6.2"))

# scientific
if (
object_type == pystac.STACObjectType.ITEM
or object_type == pystac.STACObjectType.COLLECTION
):
if "properties" in d:
prop_keys = cast(Dict[str, Any], d["properties"]).keys()
if any(k.startswith("sci:") for k in prop_keys):
stac_extensions.add(OldExtensionShortIDs.SCIENTIFIC.value)
version_range.set_min(STACVersionID("0.6.0"))
version_range.set_min(STACVersionID("0.6.2"))

# Single File STAC
if object_type == pystac.STACObjectType.ITEMCOLLECTION:
Expand All @@ -298,40 +203,59 @@ def _identify_stac_extensions(
return list(stac_extensions)


def identify_stac_object_type(json_dict: Dict[str, Any]) -> "STACObjectType_Type":
"""Determines the STACObjectType of the provided JSON dict.
def identify_stac_object_type(
json_dict: Dict[str, Any]
) -> Optional["STACObjectType_Type"]:
"""Determines the STACObjectType of the provided JSON dict. If the JSON dict does
not represent a STAC object, returns ``None``.
Args:
json_dict : The dict of STAC JSON to identify.
Will first try to identify the object using ``"type"`` field as described in the
guidelines in :stac-spec:`How to Differentiate STAC Files
<best-practices.md#how-to-differentiate-stac-files>`. If this fails, will fall back
to using the pre-1.0 heuristic described in `this issue
<https://github.com/radiantearth/stac-spec/issues/889#issuecomment-684529444>`__
Returns:
STACObjectType: The object type represented by the JSON.
Args:
json_dict : The dict of JSON to identify.
"""
object_type = None

if "type" in json_dict: # Try to identify using 'type' property
# Try to identify using 'type' property, if present
if "type" in json_dict:
# Try to find 'type' property in known STACObjectType values
for t in pystac.STACObjectType:
if json_dict["type"].lower() == t.value.lower():
object_type = t
break

if object_type is None: # Use old-approach based on other properties
# Identify pre-1.0 ITEMCOLLECTION (since removed)
if "type" in json_dict and "assets" not in json_dict:
if "stac_version" in json_dict and json_dict["stac_version"].startswith(
"0"
):
if json_dict["type"] == "FeatureCollection":
object_type = pystac.STACObjectType.ITEMCOLLECTION

if "extent" in json_dict:
object_type = pystac.STACObjectType.COLLECTION
elif "assets" in json_dict:
object_type = pystac.STACObjectType.ITEM
return t

obj_type = json_dict.get("type")

# For pre-1.0 objects for version 0.8.* or later 'stac_version' must be present,
# except for in ItemCollections (which are handled in the else clause)
if "stac_version" in json_dict:
# Pre-1.0 STAC objects with 'type' == "Feature" are Items
if obj_type == "Feature":
return pystac.STACObjectType.ITEM
# Pre-1.0 STAC objects with 'type' == "FeatureCollection" are ItemCollections
if obj_type == "FeatureCollection":
return pystac.STACObjectType.ITEMCOLLECTION
# Anything else with a 'type' field is not a STAC object
if obj_type is not None:
return None

# Collections will contain either an 'extent' or a 'license' (or both)
if "extent" in json_dict or "license" in json_dict:
return pystac.STACObjectType.COLLECTION
# Everything else that has a stac_version is a Catalog
else:
object_type = pystac.STACObjectType.CATALOG

return object_type
return pystac.STACObjectType.CATALOG
else:
# Prior to STAC 0.9 ItemCollections did not have a stac_version field and could
# only be identified by the fact that all of their 'features' are STAC Items
if obj_type == "FeatureCollection":
if all(
identify_stac_object_type(feat) == pystac.STACObjectType.ITEM
for feat in json_dict.get("features", [])
):
return pystac.STACObjectType.ITEMCOLLECTION
return None


def identify_stac_object(json_dict: Dict[str, Any]) -> STACJSONDescription:
Expand All @@ -346,21 +270,16 @@ def identify_stac_object(json_dict: Dict[str, Any]) -> STACJSONDescription:
"""
object_type = identify_stac_object_type(json_dict)

if object_type is None:
raise pystac.STACTypeError("JSON does not represent a STAC object.")

version_range = STACVersionRange()

stac_version = json_dict.get("stac_version")
stac_extensions = json_dict.get("stac_extensions", None)

if stac_version is None:
if (
object_type == pystac.STACObjectType.CATALOG
or object_type == pystac.STACObjectType.COLLECTION
):
version_range.set_max(STACVersionID("0.5.2"))
elif object_type == pystac.STACObjectType.ITEM:
version_range.set_max(STACVersionID("0.7.0"))
else: # ItemCollection
version_range.set_min(STACVersionID("0.8.0"))
version_range.set_min(STACVersionID("0.8.0"))
else:
version_range.set_to_single(stac_version)

Expand All @@ -372,7 +291,7 @@ def identify_stac_object(json_dict: Dict[str, Any]) -> STACJSONDescription:
# if the stac_extensions property doesn't exist for everything
# but ItemCollection (except after 0.9.0, when ItemCollection also got
# the stac_extensions property).
if version_range.is_earlier_than("0.8.0") or (
if (
object_type == pystac.STACObjectType.ITEMCOLLECTION
and not version_range.is_later_than("0.8.1")
):
Expand All @@ -390,21 +309,4 @@ def identify_stac_object(json_dict: Dict[str, Any]) -> STACJSONDescription:
# code translates the short name IDs used pre-1.0.0-RC1 to the
# relevant extension schema uri identifier.

if not version_range.is_single_version():
# Final Checks

if "links" in json_dict:
# links were a dictionary only in 0.5
if "links" in json_dict and isinstance(json_dict["links"], dict):
version_range.set_to_single(STACVersionID("0.5.2"))

# self links became non-required in 0.7.0
if not version_range.is_earlier_than("0.7.0") and not any(
filter(
lambda l: cast(Dict[str, Any], l)["rel"] == pystac.RelType.SELF,
json_dict["links"],
)
):
version_range.set_min(STACVersionID("0.7.0"))

return STACJSONDescription(object_type, version_range, set(stac_extensions))
15 changes: 2 additions & 13 deletions pystac/serialization/migrate.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,18 +13,9 @@
from pystac import STACObjectType as STACObjectType_Type


def _migrate_links(d: Dict[str, Any], version: STACVersionID) -> None:
if version < "0.6":
if "links" in d:
if isinstance(d["links"], dict):
d["links"] = list(d["links"].values())


def _migrate_catalog(
d: Dict[str, Any], version: STACVersionID, info: STACJSONDescription
) -> None:
_migrate_links(d, version)

if version < "0.8":
d["stac_extensions"] = list(info.extensions)

Expand All @@ -38,10 +29,8 @@ def _migrate_collection(
def _migrate_item(
d: Dict[str, Any], version: STACVersionID, info: STACJSONDescription
) -> None:
_migrate_links(d, version)

if version < "0.8":
d["stac_extensions"] = list(info.extensions)
# No migrations necessary for supported STAC versions (>=0.8)
pass


def _migrate_itemcollection(
Expand Down
Loading

0 comments on commit 8789513

Please sign in to comment.