Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove references to RefResolver #1215

Merged
merged 18 commits into from
Sep 19, 2023
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 2 additions & 6 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ test = [
"doc8~=1.1",
"html5lib~=1.1",
"jinja2<4.0",
"jsonschema>=4.0.1,<4.18",
"jsonschema~=4.18",
"mypy~=1.2",
"orjson~=3.8",
"pre-commit~=3.2",
Expand All @@ -64,8 +64,7 @@ test = [
"types-urllib3~=1.26",
]
urllib3 = ["urllib3>=1.26"]
# jsonschema v4.18.2 breaks validation, and it feels safer to set a ceiling rather than just skip this version. The ceiling should be removed when the v4.18 lineage has settled down and feels safer.
validation = ["jsonschema>=4.0.1,<4.18"]
validation = ["jsonschema~=4.18"]

[project.urls]
homepage = "https://github.com/stac-utils/pystac"
Expand All @@ -88,9 +87,6 @@ select = ["E", "F", "I"]
[tool.pytest.ini_options]
filterwarnings = [
"error",
# Allows jsonschema's RefResolver deprecation warning through until we're
# updated to support jsonschema v4.18
"default::DeprecationWarning:pystac.validation.*",
]

[build-system]
Expand Down
3 changes: 2 additions & 1 deletion pystac/validation/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,12 @@

import pystac
from pystac.serialization.identify import STACVersionID, identify_stac_object
from pystac.stac_object import STACObjectType
from pystac.utils import make_absolute_href
from pystac.validation.schema_uri_map import OldExtensionSchemaUriMap

if TYPE_CHECKING:
from pystac.stac_object import STACObject, STACObjectType
from pystac.stac_object import STACObject


# Import after above class definition
Expand Down
91 changes: 32 additions & 59 deletions pystac/validation/local_validator.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,7 @@
import json
import sys
from typing import Any, Dict, List, cast
from typing import Any, Dict, cast

from jsonschema import Draft7Validator, RefResolver, ValidationError

from pystac.errors import STACLocalValidationError
from pystac.version import STACVersion

if sys.version_info[:2] < (3, 9):
Expand All @@ -13,65 +10,41 @@
from importlib.resources import files as importlib_resources_files

VERSION = STACVersion.DEFAULT_STAC_VERSION
ITEM_SCHEMA_URI = (
f"https://schemas.stacspec.org/v{VERSION}/item-spec/json-schema/item.json"
)
COLLECTION_SCHEMA_URI = (
f"https://schemas.stacspec.org/v{VERSION}/"
"collection-spec/json-schema/collection.json"
)
CATALOG_SCHEMA_URI = (
f"https://schemas.stacspec.org/v{VERSION}/catalog-spec/json-schema/catalog.json"
)


class LocalValidator:
def _validate_from_local(
self, schema_uri: str, stac_dict: Dict[str, Any]
) -> List[ValidationError]:
if schema_uri == ITEM_SCHEMA_URI:
validator = self.item_validator(VERSION)
elif schema_uri == COLLECTION_SCHEMA_URI:
validator = self.collection_validator(VERSION)
elif schema_uri == CATALOG_SCHEMA_URI:
validator = self.catalog_validator(VERSION)
else:
raise STACLocalValidationError(
f"Schema not available locally: {schema_uri}"
)
return list(validator.iter_errors(stac_dict))

def _validator(self, stac_type: str, version: str) -> Draft7Validator:
schema = _read_schema(f"stac-spec/v{version}/{stac_type}.json")
resolver = RefResolver.from_schema(schema)
resolver.store[
f"https://schemas.stacspec.org/v{version}/collection-spec/json-schema/collection.json"
] = _read_schema(f"stac-spec/v{version}/collection.json")
resolver.store[
f"https://schemas.stacspec.org/v{version}/item-spec/json-schema/item.json"
] = _read_schema(f"stac-spec/v{version}/item.json")
for name in ("Feature", "Geometry"):
resolver.store[f"https://geojson.org/schema/{name}.json"] = _read_schema(
f"geojson/{name}.json"
)
for name in ("basics", "datetime", "instrument", "licensing", "provider"):
resolver.store[
f"https://schemas.stacspec.org/v{version}/item-spec/json-schema/{name}.json"
] = _read_schema(f"stac-spec/v{version}/{name}.json")
return Draft7Validator(schema, resolver=resolver)

def catalog_validator(self, version: str = VERSION) -> Draft7Validator:
return self._validator("catalog", version)

def collection_validator(self, version: str = VERSION) -> Draft7Validator:
return self._validator("collection", version)

def item_validator(self, version: str = VERSION) -> Draft7Validator:
return self._validator("item", version)


def _read_schema(file_name: str) -> Dict[str, Any]:
with importlib_resources_files("pystac.validation.jsonschemas").joinpath(
file_name
).open("r") as f:
return cast(Dict[str, Any], json.load(f))


def get_local_schema_cache() -> Dict[str, Dict[str, Any]]:
return {
**{
(
f"https://schemas.stacspec.org/v{VERSION}/"
f"{name}-spec/json-schema/{name}.json"
): _read_schema(f"stac-spec/v{VERSION}/{name}.json")
for name in ("item", "catalog", "collection")
},
**{
f"https://geojson.org/schema/{name}.json": _read_schema(
f"geojson/{name}.json"
)
for name in ("Feature", "Geometry")
},
**{
(
f"https://schemas.stacspec.org/v{VERSION}/"
f"item-spec/json-schema/{name}.json"
): _read_schema(f"stac-spec/v{VERSION}/{name}.json")
for name in (
"basics",
"datetime",
"instrument",
"licensing",
"provider",
)
},
}
60 changes: 34 additions & 26 deletions pystac/validation/stac_validator.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,22 @@
import json
import logging
import warnings
from abc import ABC, abstractmethod
from typing import Any, Dict, List, Optional, Tuple

import pystac
import pystac.utils
from pystac.errors import STACLocalValidationError, STACValidationError
from pystac.errors import STACValidationError
from pystac.stac_object import STACObjectType
from pystac.validation.schema_uri_map import DefaultSchemaUriMap, SchemaUriMap

try:
import jsonschema
import jsonschema.exceptions
import jsonschema.validators
from referencing import Registry, Resource

from pystac.validation.local_validator import LocalValidator
from pystac.validation.local_validator import get_local_schema_cache

HAS_JSONSCHEMA = True
except ImportError:
Expand Down Expand Up @@ -149,20 +151,35 @@ def __init__(self, schema_uri_map: Optional[SchemaUriMap] = None) -> None:
else:
self.schema_uri_map = DefaultSchemaUriMap()

self.schema_cache = {}
self.schema_cache = get_local_schema_cache()

def get_schema_from_uri(self, schema_uri: str) -> Tuple[Dict[str, Any], Any]:
def _get_schema(self, schema_uri: str) -> Dict[str, Any]:
if schema_uri not in self.schema_cache:
s = json.loads(pystac.StacIO.default().read_text(schema_uri))
self.schema_cache[schema_uri] = s

schema = self.schema_cache[schema_uri]

resolver = jsonschema.validators.RefResolver(
base_uri=schema_uri, referrer=schema, store=self.schema_cache
id_field = "$id" if "$id" in s else "id"
if not s[id_field].startswith("http"):
s[id_field] = schema_uri
return self.schema_cache[schema_uri]

@property
def registry(self) -> Any:
def retrieve(schema_uri: str) -> Resource[Dict[str, Any]]:
return Resource.from_contents(self._get_schema(schema_uri))

return Registry(retrieve=retrieve).with_resources( # type: ignore
[
(k, Resource.from_contents(v)) for k, v in self.schema_cache.items()
] # type: ignore
gadomski marked this conversation as resolved.
Show resolved Hide resolved
)

return schema, resolver
def get_schema_from_uri(self, schema_uri: str) -> Tuple[Dict[str, Any], Any]:
"""DEPRECATED"""
warnings.warn(
"get_schema_from_uri is deprecated and will be removed in v2.",
DeprecationWarning,
)
return self._get_schema(schema_uri), self.registry

def _validate_from_uri(
self,
Expand All @@ -172,17 +189,13 @@ def _validate_from_uri(
href: Optional[str] = None,
) -> None:
try:
resolver = None
try:
errors = LocalValidator()._validate_from_local(schema_uri, stac_dict)
except STACLocalValidationError:
schema, resolver = self.get_schema_from_uri(schema_uri)
# This block is cribbed (w/ change in error handling) from
# jsonschema.validate
cls = jsonschema.validators.validator_for(schema)
cls.check_schema(schema)
validator = cls(schema, resolver=resolver)
errors = list(validator.iter_errors(stac_dict))
schema = self._get_schema(schema_uri)
# This block is cribbed (w/ change in error handling) from
# jsonschema.validate
cls = jsonschema.validators.validator_for(schema)
cls.check_schema(schema)
validator = cls(schema, registry=self.registry)
errors = list(validator.iter_errors(stac_dict))
except Exception as e:
logger.error(f"Exception while validating {stac_object_type} href: {href}")
logger.exception(e)
Expand All @@ -199,11 +212,6 @@ def _validate_from_uri(
best = jsonschema.exceptions.best_match(errors)
raise STACValidationError(msg, source=errors) from best

if resolver is not None:
for uri in resolver.store:
if uri not in self.schema_cache:
self.schema_cache[uri] = resolver.store[uri]

def validate_core(
self,
stac_dict: Dict[str, Any],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -640,4 +640,128 @@ interactions:
status:
code: 200
message: OK
- request:
body: null
headers:
Connection:
- close
Host:
- stac-extensions.github.io
User-Agent:
- Python-urllib/3.9
method: GET
uri: https://stac-extensions.github.io/projection/v1.0.0/schema.json
response:
body:
string: "{\n \"$schema\": \"http://json-schema.org/draft-07/schema#\",\n \"$id\":
\"https://stac-extensions.github.io/projection/v1.0.0/schema.json\",\n \"title\":
\"Projection Extension\",\n \"description\": \"STAC Projection Extension
for STAC Items.\",\n \"oneOf\": [\n {\n \"$comment\": \"This is the
schema for STAC Items.\",\n \"allOf\": [\n {\n \"type\":
\"object\",\n \"required\": [\n \"type\",\n \"properties\",\n
\ \"assets\"\n ],\n \"properties\": {\n \"type\":
{\n \"const\": \"Feature\"\n },\n \"properties\":
{\n \"allOf\": [\n {\n \"$comment\":
\"Require fields here for item properties.\",\n \"required\":
[\n \"proj:epsg\"\n ]\n },\n
\ {\n \"$ref\": \"#/definitions/fields\"\n
\ }\n ]\n },\n \"assets\":
{\n \"type\": \"object\",\n \"additionalProperties\":
{\n \"$ref\": \"#/definitions/fields\"\n }\n }\n
\ }\n },\n {\n \"$ref\": \"#/definitions/stac_extensions\"\n
\ }\n ]\n },\n {\n \"$comment\": \"This is the schema
for STAC Collections.\",\n \"allOf\": [\n {\n \"type\":
\"object\",\n \"required\": [\n \"type\"\n ],\n
\ \"properties\": {\n \"type\": {\n \"const\":
\"Collection\"\n },\n \"assets\": {\n \"type\":
\"object\",\n \"additionalProperties\": {\n \"$ref\":
\"#/definitions/fields\"\n }\n },\n \"item_assets\":
{\n \"type\": \"object\",\n \"additionalProperties\":
{\n \"$ref\": \"#/definitions/fields\"\n }\n }\n
\ }\n },\n {\n \"$ref\": \"#/definitions/stac_extensions\"\n
\ }\n ]\n }\n ],\n \"definitions\": {\n \"stac_extensions\":
{\n \"type\": \"object\",\n \"required\": [\n \"stac_extensions\"\n
\ ],\n \"properties\": {\n \"stac_extensions\": {\n \"type\":
\"array\",\n \"contains\": {\n \"const\": \"https://stac-extensions.github.io/projection/v1.0.0/schema.json\"\n
\ }\n }\n }\n },\n \"fields\": {\n \"$comment\":
\"Add your new fields here. Don't require them here, do that above in the
item schema.\",\n \"type\": \"object\",\n \"properties\": {\n \"proj:epsg\":{\n
\ \"title\":\"EPSG code\",\n \"type\":[\n \"integer\",\n
\ \"null\"\n ]\n },\n \"proj:wkt2\":{\n \"title\":\"Coordinate
Reference System in WKT2 format\",\n \"type\":[\n \"string\",\n
\ \"null\"\n ]\n },\n \"proj:projjson\":
{\n \"title\":\"Coordinate Reference System in PROJJSON format\",\n
\ \"oneOf\": [\n {\n \"$ref\": \"https://proj.org/schemas/v0.2/projjson.schema.json\"\n
\ },\n {\n \"type\": \"null\"\n }\n
\ ]\n },\n \"proj:geometry\":{\n \"$ref\":
\"https://geojson.org/schema/Geometry.json\"\n },\n \"proj:bbox\":{\n
\ \"title\":\"Extent\",\n \"type\":\"array\",\n \"oneOf\":
[\n {\n \"minItems\":4,\n \"maxItems\":4\n
\ },\n {\n \"minItems\":6,\n \"maxItems\":6\n
\ }\n ],\n \"items\":{\n \"type\":\"number\"\n
\ }\n },\n \"proj:centroid\":{\n \"title\":\"Centroid\",\n
\ \"type\":\"object\",\n \"required\": [\n \"lat\",\n
\ \"lon\"\n ],\n \"properties\": {\n \"lat\":
{\n \"type\": \"number\",\n \"minimum\": -90,\n
\ \"maximum\": 90\n },\n \"lon\": {\n \"type\":
\"number\",\n \"minimum\": -180,\n \"maximum\":
180\n }\n }\n },\n \"proj:shape\":{\n \"title\":\"Shape\",\n
\ \"type\":\"array\",\n \"minItems\":2,\n \"maxItems\":2,\n
\ \"items\":{\n \"type\":\"integer\"\n }\n },\n
\ \"proj:transform\":{\n \"title\":\"Transform\",\n \"type\":\"array\",\n
\ \"oneOf\": [\n {\n \"minItems\":6,\n \"maxItems\":6\n
\ },\n {\n \"minItems\":9,\n \"maxItems\":9\n
\ }\n ],\n \"items\":{\n \"type\":\"number\"\n
\ }\n }\n },\n \"patternProperties\": {\n \"^(?!proj:)\":
{}\n },\n \"additionalProperties\": false\n }\n }\n}"
headers:
Accept-Ranges:
- bytes
Access-Control-Allow-Origin:
- '*'
Age:
- '0'
Cache-Control:
- max-age=600
Connection:
- close
Content-Length:
- '4646'
Content-Type:
- application/json; charset=utf-8
Date:
- Wed, 13 Sep 2023 18:50:52 GMT
ETag:
- '"63e6651b-1226"'
Last-Modified:
- Fri, 10 Feb 2023 15:39:07 GMT
Server:
- github.com
Strict-Transport-Security:
- max-age=31556952
Vary:
- Accept-Encoding
Via:
- 1.1 varnish
X-Cache:
- HIT
X-Cache-Hits:
- '1'
X-Fastly-Request-ID:
- a0eb45653d9d666c06c2a07a8c6ed0eecd22bbe4
X-GitHub-Request-Id:
- 3F32:105C:90E8A3:BBFF05:6501E745
X-Served-By:
- cache-bos4626-BOS
X-Timer:
- S1694631052.253405,VS0,VE33
expires:
- Wed, 13 Sep 2023 16:55:59 GMT
permissions-policy:
- interest-cohort=()
x-proxy-cache:
- MISS
status:
code: 200
message: OK
version: 1