Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove references to RefResolver #1215

Merged
merged 18 commits into from
Sep 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,15 @@

## [Unreleased]

### Fixed

- Update usage of jsonschema ([#1215](https://github.com/stac-utils/pystac/pull/1215))

### Deprecated

- `pystac.validation.local_validator.LocalValidator` ([#1215](https://github.com/stac-utils/pystac/pull/1215))


## [v1.8.3] - 2023-07-12

### Added
Expand Down
8 changes: 2 additions & 6 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ test = [
"doc8~=1.1",
"html5lib~=1.1",
"jinja2<4.0",
"jsonschema>=4.0.1,<4.18",
"jsonschema~=4.18",
"mypy~=1.2",
"orjson~=3.8",
"pre-commit~=3.2",
Expand All @@ -64,8 +64,7 @@ test = [
"types-urllib3~=1.26",
]
urllib3 = ["urllib3>=1.26"]
# jsonschema v4.18.2 breaks validation, and it feels safer to set a ceiling rather than just skip this version. The ceiling should be removed when the v4.18 lineage has settled down and feels safer.
validation = ["jsonschema>=4.0.1,<4.18"]
validation = ["jsonschema~=4.18"]

[project.urls]
homepage = "https://github.com/stac-utils/pystac"
Expand All @@ -88,9 +87,6 @@ select = ["E", "F", "I"]
[tool.pytest.ini_options]
filterwarnings = [
"error",
# Allows jsonschema's RefResolver deprecation warning through until we're
# updated to support jsonschema v4.18
"default::DeprecationWarning:pystac.validation.*",
]

[build-system]
Expand Down
3 changes: 2 additions & 1 deletion pystac/validation/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,12 @@

import pystac
from pystac.serialization.identify import STACVersionID, identify_stac_object
from pystac.stac_object import STACObjectType
from pystac.utils import make_absolute_href
from pystac.validation.schema_uri_map import OldExtensionSchemaUriMap

if TYPE_CHECKING:
from pystac.stac_object import STACObject, STACObjectType
from pystac.stac_object import STACObject


# Import after above class definition
Expand Down
106 changes: 76 additions & 30 deletions pystac/validation/local_validator.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import json
import sys
import warnings
from typing import Any, Dict, List, cast

from jsonschema import Draft7Validator, RefResolver, ValidationError
from jsonschema import Draft7Validator, ValidationError
from referencing import Registry, Resource

from pystac.errors import STACLocalValidationError
from pystac.version import STACVersion
Expand All @@ -13,27 +15,93 @@
from importlib.resources import files as importlib_resources_files

VERSION = STACVersion.DEFAULT_STAC_VERSION
ITEM_SCHEMA_URI = (


def _read_schema(file_name: str) -> Dict[str, Any]:
with importlib_resources_files("pystac.validation.jsonschemas").joinpath(
file_name
).open("r") as f:
return cast(Dict[str, Any], json.load(f))


def get_local_schema_cache() -> Dict[str, Dict[str, Any]]:
return {
**{
(
f"https://schemas.stacspec.org/v{VERSION}/"
f"{name}-spec/json-schema/{name}.json"
): _read_schema(f"stac-spec/v{VERSION}/{name}.json")
for name in ("item", "catalog", "collection")
},
**{
f"https://geojson.org/schema/{name}.json": _read_schema(
f"geojson/{name}.json"
)
for name in ("Feature", "Geometry")
},
**{
(
f"https://schemas.stacspec.org/v{VERSION}/"
f"item-spec/json-schema/{name}.json"
): _read_schema(f"stac-spec/v{VERSION}/{name}.json")
for name in (
"basics",
"datetime",
"instrument",
"licensing",
"provider",
)
},
}


############################### DEPRECATED #################################

_deprecated_ITEM_SCHEMA_URI = (
f"https://schemas.stacspec.org/v{VERSION}/item-spec/json-schema/item.json"
)
COLLECTION_SCHEMA_URI = (
_deprecated_COLLECTION_SCHEMA_URI = (
f"https://schemas.stacspec.org/v{VERSION}/"
"collection-spec/json-schema/collection.json"
)
CATALOG_SCHEMA_URI = (
_deprecated_CATALOG_SCHEMA_URI = (
f"https://schemas.stacspec.org/v{VERSION}/catalog-spec/json-schema/catalog.json"
)

deprecated_names = ["ITEM_SCHEMA_URI", "COLLECTION_SCHEMA_URI", "CATALOG_SCHEMA_URI"]


def __getattr__(name: str) -> Any:
if name in deprecated_names:
warnings.warn(f"{name} is deprecated and will be removed in v2.", FutureWarning)
return globals()[f"_deprecated_{name}"]
raise AttributeError(f"module {__name__} has no attribute {name}")


class LocalValidator:
def __init__(self) -> None:
"""DEPRECATED"""
warnings.warn(
"``LocalValidator`` is deprecated and will be removed in v2.",
DeprecationWarning,
)
self.schema_cache = get_local_schema_cache()

def registry(self) -> Any:
return Registry().with_resources(
[
(k, Resource.from_contents(v)) for k, v in self.schema_cache.items()
] # type: ignore
)

def _validate_from_local(
self, schema_uri: str, stac_dict: Dict[str, Any]
) -> List[ValidationError]:
if schema_uri == ITEM_SCHEMA_URI:
if schema_uri == _deprecated_ITEM_SCHEMA_URI:
validator = self.item_validator(VERSION)
elif schema_uri == COLLECTION_SCHEMA_URI:
elif schema_uri == _deprecated_COLLECTION_SCHEMA_URI:
validator = self.collection_validator(VERSION)
elif schema_uri == CATALOG_SCHEMA_URI:
elif schema_uri == _deprecated_CATALOG_SCHEMA_URI:
validator = self.catalog_validator(VERSION)
else:
raise STACLocalValidationError(
Expand All @@ -43,22 +111,7 @@ def _validate_from_local(

def _validator(self, stac_type: str, version: str) -> Draft7Validator:
schema = _read_schema(f"stac-spec/v{version}/{stac_type}.json")
resolver = RefResolver.from_schema(schema)
resolver.store[
f"https://schemas.stacspec.org/v{version}/collection-spec/json-schema/collection.json"
] = _read_schema(f"stac-spec/v{version}/collection.json")
resolver.store[
f"https://schemas.stacspec.org/v{version}/item-spec/json-schema/item.json"
] = _read_schema(f"stac-spec/v{version}/item.json")
for name in ("Feature", "Geometry"):
resolver.store[f"https://geojson.org/schema/{name}.json"] = _read_schema(
f"geojson/{name}.json"
)
for name in ("basics", "datetime", "instrument", "licensing", "provider"):
resolver.store[
f"https://schemas.stacspec.org/v{version}/item-spec/json-schema/{name}.json"
] = _read_schema(f"stac-spec/v{version}/{name}.json")
return Draft7Validator(schema, resolver=resolver)
return Draft7Validator(schema, registry=self.registry)

def catalog_validator(self, version: str = VERSION) -> Draft7Validator:
return self._validator("catalog", version)
Expand All @@ -68,10 +121,3 @@ def collection_validator(self, version: str = VERSION) -> Draft7Validator:

def item_validator(self, version: str = VERSION) -> Draft7Validator:
return self._validator("item", version)


def _read_schema(file_name: str) -> Dict[str, Any]:
with importlib_resources_files("pystac.validation.jsonschemas").joinpath(
file_name
).open("r") as f:
return cast(Dict[str, Any], json.load(f))
60 changes: 34 additions & 26 deletions pystac/validation/stac_validator.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,22 @@
import json
import logging
import warnings
from abc import ABC, abstractmethod
from typing import Any, Dict, List, Optional, Tuple

import pystac
import pystac.utils
from pystac.errors import STACLocalValidationError, STACValidationError
from pystac.errors import STACValidationError
from pystac.stac_object import STACObjectType
from pystac.validation.schema_uri_map import DefaultSchemaUriMap, SchemaUriMap

try:
import jsonschema
import jsonschema.exceptions
import jsonschema.validators
from referencing import Registry, Resource

from pystac.validation.local_validator import LocalValidator
from pystac.validation.local_validator import get_local_schema_cache

HAS_JSONSCHEMA = True
except ImportError:
Expand Down Expand Up @@ -149,20 +151,35 @@ def __init__(self, schema_uri_map: Optional[SchemaUriMap] = None) -> None:
else:
self.schema_uri_map = DefaultSchemaUriMap()

self.schema_cache = {}
self.schema_cache = get_local_schema_cache()

def get_schema_from_uri(self, schema_uri: str) -> Tuple[Dict[str, Any], Any]:
def _get_schema(self, schema_uri: str) -> Dict[str, Any]:
if schema_uri not in self.schema_cache:
s = json.loads(pystac.StacIO.default().read_text(schema_uri))
self.schema_cache[schema_uri] = s

schema = self.schema_cache[schema_uri]

resolver = jsonschema.validators.RefResolver(
base_uri=schema_uri, referrer=schema, store=self.schema_cache
id_field = "$id" if "$id" in s else "id"
if not s[id_field].startswith("http"):
s[id_field] = schema_uri
return self.schema_cache[schema_uri]

@property
def registry(self) -> Any:
def retrieve(schema_uri: str) -> Resource[Dict[str, Any]]:
return Resource.from_contents(self._get_schema(schema_uri))

return Registry(retrieve=retrieve).with_resources( # type: ignore
[
(k, Resource.from_contents(v)) for k, v in self.schema_cache.items()
] # type: ignore
gadomski marked this conversation as resolved.
Show resolved Hide resolved
)

return schema, resolver
def get_schema_from_uri(self, schema_uri: str) -> Tuple[Dict[str, Any], Any]:
"""DEPRECATED"""
warnings.warn(
"get_schema_from_uri is deprecated and will be removed in v2.",
DeprecationWarning,
)
return self._get_schema(schema_uri), self.registry

def _validate_from_uri(
self,
Expand All @@ -172,17 +189,13 @@ def _validate_from_uri(
href: Optional[str] = None,
) -> None:
try:
resolver = None
try:
errors = LocalValidator()._validate_from_local(schema_uri, stac_dict)
except STACLocalValidationError:
schema, resolver = self.get_schema_from_uri(schema_uri)
# This block is cribbed (w/ change in error handling) from
# jsonschema.validate
cls = jsonschema.validators.validator_for(schema)
cls.check_schema(schema)
validator = cls(schema, resolver=resolver)
errors = list(validator.iter_errors(stac_dict))
schema = self._get_schema(schema_uri)
# This block is cribbed (w/ change in error handling) from
# jsonschema.validate
cls = jsonschema.validators.validator_for(schema)
cls.check_schema(schema)
validator = cls(schema, registry=self.registry)
errors = list(validator.iter_errors(stac_dict))
except Exception as e:
logger.error(f"Exception while validating {stac_object_type} href: {href}")
logger.exception(e)
Expand All @@ -199,11 +212,6 @@ def _validate_from_uri(
best = jsonschema.exceptions.best_match(errors)
raise STACValidationError(msg, source=errors) from best

if resolver is not None:
for uri in resolver.store:
if uri not in self.schema_cache:
self.schema_cache[uri] = resolver.store[uri]

def validate_core(
self,
stac_dict: Dict[str, Any],
Expand Down
Loading