Skip to content

Commit

Permalink
Remove references to RefResolver (#1215)
Browse files Browse the repository at this point in the history
* Remove references to `RefResolver`

* Add dynamic fetching of remote schemas

* Try to parse relative refs

* Replace non-http ids

* Let registry handle relative ref links

* Undo error list changes

* Remove LocalValidator class and populate schema_cache with local schemas instead

* Fix linting

* Rewrite cassettes

* Rewrite cassettes

* Just rewrite casettes that needs rewrite

* Remove cast

* Importable even if jsonschema not importable

* Add back LocalValidator but make it deprecated

* Update changelog

* Deprecate global variables

---------

Co-authored-by: Pete Gadomski <pete.gadomski@gmail.com>
  • Loading branch information
jsignell and gadomski authored Sep 19, 2023
1 parent de6cfc2 commit e4a17c1
Show file tree
Hide file tree
Showing 6 changed files with 247 additions and 63 deletions.
9 changes: 9 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,15 @@

## [Unreleased]

### Fixed

- Update usage of jsonschema ([#1215](https://github.com/stac-utils/pystac/pull/1215))

### Deprecated

- `pystac.validation.local_validator.LocalValidator` ([#1215](https://github.com/stac-utils/pystac/pull/1215))


## [v1.8.3] - 2023-07-12

### Added
Expand Down
8 changes: 2 additions & 6 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ test = [
"doc8~=1.1",
"html5lib~=1.1",
"jinja2<4.0",
"jsonschema>=4.0.1,<4.18",
"jsonschema~=4.18",
"mypy~=1.2",
"orjson~=3.8",
"pre-commit~=3.2",
Expand All @@ -64,8 +64,7 @@ test = [
"types-urllib3~=1.26",
]
urllib3 = ["urllib3>=1.26"]
# jsonschema v4.18.2 breaks validation, and it feels safer to set a ceiling rather than just skip this version. The ceiling should be removed when the v4.18 lineage has settled down and feels safer.
validation = ["jsonschema>=4.0.1,<4.18"]
validation = ["jsonschema~=4.18"]

[project.urls]
homepage = "https://github.com/stac-utils/pystac"
Expand All @@ -88,9 +87,6 @@ select = ["E", "F", "I"]
[tool.pytest.ini_options]
filterwarnings = [
"error",
# Allows jsonschema's RefResolver deprecation warning through until we're
# updated to support jsonschema v4.18
"default::DeprecationWarning:pystac.validation.*",
]

[build-system]
Expand Down
3 changes: 2 additions & 1 deletion pystac/validation/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,12 @@

import pystac
from pystac.serialization.identify import STACVersionID, identify_stac_object
from pystac.stac_object import STACObjectType
from pystac.utils import make_absolute_href
from pystac.validation.schema_uri_map import OldExtensionSchemaUriMap

if TYPE_CHECKING:
from pystac.stac_object import STACObject, STACObjectType
from pystac.stac_object import STACObject


# Import after above class definition
Expand Down
106 changes: 76 additions & 30 deletions pystac/validation/local_validator.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import json
import sys
import warnings
from typing import Any, Dict, List, cast

from jsonschema import Draft7Validator, RefResolver, ValidationError
from jsonschema import Draft7Validator, ValidationError
from referencing import Registry, Resource

from pystac.errors import STACLocalValidationError
from pystac.version import STACVersion
Expand All @@ -13,27 +15,93 @@
from importlib.resources import files as importlib_resources_files

VERSION = STACVersion.DEFAULT_STAC_VERSION
ITEM_SCHEMA_URI = (


def _read_schema(file_name: str) -> Dict[str, Any]:
with importlib_resources_files("pystac.validation.jsonschemas").joinpath(
file_name
).open("r") as f:
return cast(Dict[str, Any], json.load(f))


def get_local_schema_cache() -> Dict[str, Dict[str, Any]]:
return {
**{
(
f"https://schemas.stacspec.org/v{VERSION}/"
f"{name}-spec/json-schema/{name}.json"
): _read_schema(f"stac-spec/v{VERSION}/{name}.json")
for name in ("item", "catalog", "collection")
},
**{
f"https://geojson.org/schema/{name}.json": _read_schema(
f"geojson/{name}.json"
)
for name in ("Feature", "Geometry")
},
**{
(
f"https://schemas.stacspec.org/v{VERSION}/"
f"item-spec/json-schema/{name}.json"
): _read_schema(f"stac-spec/v{VERSION}/{name}.json")
for name in (
"basics",
"datetime",
"instrument",
"licensing",
"provider",
)
},
}


############################### DEPRECATED #################################

_deprecated_ITEM_SCHEMA_URI = (
f"https://schemas.stacspec.org/v{VERSION}/item-spec/json-schema/item.json"
)
COLLECTION_SCHEMA_URI = (
_deprecated_COLLECTION_SCHEMA_URI = (
f"https://schemas.stacspec.org/v{VERSION}/"
"collection-spec/json-schema/collection.json"
)
CATALOG_SCHEMA_URI = (
_deprecated_CATALOG_SCHEMA_URI = (
f"https://schemas.stacspec.org/v{VERSION}/catalog-spec/json-schema/catalog.json"
)

deprecated_names = ["ITEM_SCHEMA_URI", "COLLECTION_SCHEMA_URI", "CATALOG_SCHEMA_URI"]


def __getattr__(name: str) -> Any:
if name in deprecated_names:
warnings.warn(f"{name} is deprecated and will be removed in v2.", FutureWarning)
return globals()[f"_deprecated_{name}"]
raise AttributeError(f"module {__name__} has no attribute {name}")


class LocalValidator:
def __init__(self) -> None:
"""DEPRECATED"""
warnings.warn(
"``LocalValidator`` is deprecated and will be removed in v2.",
DeprecationWarning,
)
self.schema_cache = get_local_schema_cache()

def registry(self) -> Any:
return Registry().with_resources(
[
(k, Resource.from_contents(v)) for k, v in self.schema_cache.items()
] # type: ignore
)

def _validate_from_local(
self, schema_uri: str, stac_dict: Dict[str, Any]
) -> List[ValidationError]:
if schema_uri == ITEM_SCHEMA_URI:
if schema_uri == _deprecated_ITEM_SCHEMA_URI:
validator = self.item_validator(VERSION)
elif schema_uri == COLLECTION_SCHEMA_URI:
elif schema_uri == _deprecated_COLLECTION_SCHEMA_URI:
validator = self.collection_validator(VERSION)
elif schema_uri == CATALOG_SCHEMA_URI:
elif schema_uri == _deprecated_CATALOG_SCHEMA_URI:
validator = self.catalog_validator(VERSION)
else:
raise STACLocalValidationError(
Expand All @@ -43,22 +111,7 @@ def _validate_from_local(

def _validator(self, stac_type: str, version: str) -> Draft7Validator:
schema = _read_schema(f"stac-spec/v{version}/{stac_type}.json")
resolver = RefResolver.from_schema(schema)
resolver.store[
f"https://schemas.stacspec.org/v{version}/collection-spec/json-schema/collection.json"
] = _read_schema(f"stac-spec/v{version}/collection.json")
resolver.store[
f"https://schemas.stacspec.org/v{version}/item-spec/json-schema/item.json"
] = _read_schema(f"stac-spec/v{version}/item.json")
for name in ("Feature", "Geometry"):
resolver.store[f"https://geojson.org/schema/{name}.json"] = _read_schema(
f"geojson/{name}.json"
)
for name in ("basics", "datetime", "instrument", "licensing", "provider"):
resolver.store[
f"https://schemas.stacspec.org/v{version}/item-spec/json-schema/{name}.json"
] = _read_schema(f"stac-spec/v{version}/{name}.json")
return Draft7Validator(schema, resolver=resolver)
return Draft7Validator(schema, registry=self.registry)

def catalog_validator(self, version: str = VERSION) -> Draft7Validator:
return self._validator("catalog", version)
Expand All @@ -68,10 +121,3 @@ def collection_validator(self, version: str = VERSION) -> Draft7Validator:

def item_validator(self, version: str = VERSION) -> Draft7Validator:
return self._validator("item", version)


def _read_schema(file_name: str) -> Dict[str, Any]:
with importlib_resources_files("pystac.validation.jsonschemas").joinpath(
file_name
).open("r") as f:
return cast(Dict[str, Any], json.load(f))
60 changes: 34 additions & 26 deletions pystac/validation/stac_validator.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,22 @@
import json
import logging
import warnings
from abc import ABC, abstractmethod
from typing import Any, Dict, List, Optional, Tuple

import pystac
import pystac.utils
from pystac.errors import STACLocalValidationError, STACValidationError
from pystac.errors import STACValidationError
from pystac.stac_object import STACObjectType
from pystac.validation.schema_uri_map import DefaultSchemaUriMap, SchemaUriMap

try:
import jsonschema
import jsonschema.exceptions
import jsonschema.validators
from referencing import Registry, Resource

from pystac.validation.local_validator import LocalValidator
from pystac.validation.local_validator import get_local_schema_cache

HAS_JSONSCHEMA = True
except ImportError:
Expand Down Expand Up @@ -149,20 +151,35 @@ def __init__(self, schema_uri_map: Optional[SchemaUriMap] = None) -> None:
else:
self.schema_uri_map = DefaultSchemaUriMap()

self.schema_cache = {}
self.schema_cache = get_local_schema_cache()

def get_schema_from_uri(self, schema_uri: str) -> Tuple[Dict[str, Any], Any]:
def _get_schema(self, schema_uri: str) -> Dict[str, Any]:
if schema_uri not in self.schema_cache:
s = json.loads(pystac.StacIO.default().read_text(schema_uri))
self.schema_cache[schema_uri] = s

schema = self.schema_cache[schema_uri]

resolver = jsonschema.validators.RefResolver(
base_uri=schema_uri, referrer=schema, store=self.schema_cache
id_field = "$id" if "$id" in s else "id"
if not s[id_field].startswith("http"):
s[id_field] = schema_uri
return self.schema_cache[schema_uri]

@property
def registry(self) -> Any:
def retrieve(schema_uri: str) -> Resource[Dict[str, Any]]:
return Resource.from_contents(self._get_schema(schema_uri))

return Registry(retrieve=retrieve).with_resources( # type: ignore
[
(k, Resource.from_contents(v)) for k, v in self.schema_cache.items()
] # type: ignore
)

return schema, resolver
def get_schema_from_uri(self, schema_uri: str) -> Tuple[Dict[str, Any], Any]:
"""DEPRECATED"""
warnings.warn(
"get_schema_from_uri is deprecated and will be removed in v2.",
DeprecationWarning,
)
return self._get_schema(schema_uri), self.registry

def _validate_from_uri(
self,
Expand All @@ -172,17 +189,13 @@ def _validate_from_uri(
href: Optional[str] = None,
) -> None:
try:
resolver = None
try:
errors = LocalValidator()._validate_from_local(schema_uri, stac_dict)
except STACLocalValidationError:
schema, resolver = self.get_schema_from_uri(schema_uri)
# This block is cribbed (w/ change in error handling) from
# jsonschema.validate
cls = jsonschema.validators.validator_for(schema)
cls.check_schema(schema)
validator = cls(schema, resolver=resolver)
errors = list(validator.iter_errors(stac_dict))
schema = self._get_schema(schema_uri)
# This block is cribbed (w/ change in error handling) from
# jsonschema.validate
cls = jsonschema.validators.validator_for(schema)
cls.check_schema(schema)
validator = cls(schema, registry=self.registry)
errors = list(validator.iter_errors(stac_dict))
except Exception as e:
logger.error(f"Exception while validating {stac_object_type} href: {href}")
logger.exception(e)
Expand All @@ -199,11 +212,6 @@ def _validate_from_uri(
best = jsonschema.exceptions.best_match(errors)
raise STACValidationError(msg, source=errors) from best

if resolver is not None:
for uri in resolver.store:
if uri not in self.schema_cache:
self.schema_cache[uri] = resolver.store[uri]

def validate_core(
self,
stac_dict: Dict[str, Any],
Expand Down
Loading

0 comments on commit e4a17c1

Please sign in to comment.