Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pick the deepest error among the most relevant ones in each separate subschema #1300

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 13 additions & 3 deletions jsonschema/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -397,7 +397,6 @@ def relevance(error):
validator = error.validator
return ( # prefer errors which are ...
-len(error.path), # 'deeper' and thereby more specific
error.path, # earlier (for sibling errors)
validator not in weak, # for a non-low-priority keyword
validator in strong, # for a high priority keyword
not error._matches_type(), # at least match the instance's type
Expand Down Expand Up @@ -427,7 +426,8 @@ def best_match(errors, key=relevance):
since they indicate "more" is wrong with the instance.

If the resulting match is either :kw:`oneOf` or :kw:`anyOf`, the
*opposite* assumption is made -- i.e. the deepest error is picked,
*opposite* assumption is made -- i.e. the deepest error is picked
among the most relevant errors in each separate subschema,
since these keywords only need to match once, and any other errors
may not be relevant.

Expand Down Expand Up @@ -464,9 +464,19 @@ def best_match(errors, key=relevance):
best = max(itertools.chain([best], errors), key=key)

while best.context:
# Calculate the most relevant error in each separate subschema
best_in_subschemas = []
for error in best.context:
index = error.schema_path[0]
if index == len(best_in_subschemas):
best_in_subschemas.append(error)
else:
prev = best_in_subschemas[index]
best_in_subschemas[index] = max(prev, error, key=key)

# Calculate the minimum via nsmallest, because we don't recurse if
# all nested errors have the same relevance (i.e. if min == max == all)
smallest = heapq.nsmallest(2, best.context, key=key)
smallest = heapq.nsmallest(2, best_in_subschemas, key=key)
if len(smallest) == 2 and key(smallest[0]) == key(smallest[1]): # noqa: PLR2004
return best
best = smallest[0]
Expand Down
2 changes: 1 addition & 1 deletion jsonschema/tests/test_deprecations.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ def test_RefResolver(self):
self.assertEqual(w.filename, __file__)

with self.assertWarnsRegex(DeprecationWarning, message) as w:
from jsonschema.validators import RefResolver # noqa: F401, F811
from jsonschema.validators import RefResolver # noqa: F401
self.assertEqual(w.filename, __file__)

def test_RefResolutionError(self):
Expand Down
198 changes: 194 additions & 4 deletions jsonschema/tests/test_exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,53 @@ def test_if_the_most_relevant_error_is_anyOf_it_is_traversed(self):
best = self.best_match_of(instance={"foo": {"bar": 12}}, schema=schema)
self.assertEqual(best.validator_value, "array")

def test_anyOf_traversal_for_single_shallower_errors_better_match(self):
"""
Traverse the context of an anyOf error with the only subschema,
and select the most relevant error.
"""

schema = {
"anyOf": [
{
"properties": {
"foo": {
"minProperties": 2,
"properties": {"bar": {"type": "object"}},
},
},
},
],
}
best = self.best_match_of(instance={"foo": {"bar": []}}, schema=schema)
self.assertEqual(best.validator, "minProperties")

def test_anyOf_traversal_least_relevant_among_most_relevant_errors(self):
"""
Traverse the context of an anyOf error, and select
the *least* relevant error among the most relevant errors
in each separate subschema.

I.e. since only one of the schemas must match, we look for the most
specific one, and choose the most relevant error produced by it.
"""

schema = {
"anyOf": [
{"type": "string"},
{
"properties": {
"foo": {
"minProperties": 2,
"properties": {"bar": {"type": "object"}},
},
},
},
],
}
best = self.best_match_of(instance={"foo": {"bar": []}}, schema=schema)
self.assertEqual(best.validator, "minProperties")

def test_no_anyOf_traversal_for_equally_relevant_errors(self):
"""
We don't traverse into an anyOf (as above) if all of its context errors
Expand All @@ -86,6 +133,53 @@ def test_no_anyOf_traversal_for_equally_relevant_errors(self):
best = self.best_match_of(instance=[], schema=schema)
self.assertEqual(best.validator, "anyOf")

def test_no_anyOf_traversal_for_two_properties_sibling_errors(self):
"""
We don't traverse into an anyOf if all of its context errors
seem to be equally "wrong" against the instance.
"""

schema = {
"anyOf": [
{"properties": {"foo": {"type": "string"}}},
{"properties": {"bar": {"type": "string"}}},
],
}
best = self.best_match_of(instance={"foo": 1, "bar": 1}, schema=schema)
self.assertEqual(best.validator, "anyOf")

def test_no_anyOf_traversal_for_two_items_sibling_errors(self):
"""
We don't traverse into an anyOf if all of its context errors
seem to be equally "wrong" against the instance.
"""

schema = {
"anyOf": [
{
"type": "array",
"items": {
"$ref": "#/$defs/int_array",
},
},
{
"$ref": "#/$defs/int_array",
},
],
"$defs": {
"int_array": {
"type": "array",
"items": {
"type": "integer",
},
},
},
}
best = self.best_match_of(instance=["not an int", 0], schema=schema)
self.assertEqual(best.validator, "anyOf")
best = self.best_match_of(instance=[0, "not an int"], schema=schema)
self.assertEqual(best.validator, "anyOf")

def test_anyOf_traversal_for_single_equally_relevant_error(self):
"""
We *do* traverse anyOf with a single nested error, even though it is
Expand All @@ -100,7 +194,7 @@ def test_anyOf_traversal_for_single_equally_relevant_error(self):
best = self.best_match_of(instance=[], schema=schema)
self.assertEqual(best.validator, "type")

def test_anyOf_traversal_for_single_sibling_errors(self):
def test_anyOf_traversal_for_single_sibling_errors_choose_first(self):
"""
We *do* traverse anyOf with a single subschema that fails multiple
times (e.g. on multiple items).
Expand All @@ -111,8 +205,9 @@ def test_anyOf_traversal_for_single_sibling_errors(self):
{"items": {"const": 37}},
],
}
best = self.best_match_of(instance=[12, 12], schema=schema)
best = self.best_match_of(instance=[12, 13], schema=schema)
self.assertEqual(best.validator, "const")
self.assertEqual(best.instance, 12)

def test_anyOf_traversal_for_non_type_matching_sibling_errors(self):
"""
Expand Down Expand Up @@ -152,6 +247,53 @@ def test_if_the_most_relevant_error_is_oneOf_it_is_traversed(self):
best = self.best_match_of(instance={"foo": {"bar": 12}}, schema=schema)
self.assertEqual(best.validator_value, "array")

def test_oneOf_traversal_for_single_shallower_errors_better_match(self):
"""
Traverse the context of an oneOf error with the only subschema,
and select the most relevant error.
"""

schema = {
"oneOf": [
{
"properties": {
"foo": {
"minProperties": 2,
"properties": {"bar": {"type": "object"}},
},
},
},
],
}
best = self.best_match_of(instance={"foo": {"bar": []}}, schema=schema)
self.assertEqual(best.validator, "minProperties")

def test_oneOf_traversal_least_relevant_among_most_relevant_errors(self):
"""
Traverse the context of an oneOf error, and select
the *least* relevant error among the most relevant errors
in each separate subschema.

I.e. since only one of the schemas must match, we look for the most
specific one, and choose the most relevant error produced by it.
"""

schema = {
"oneOf": [
{"type": "string"},
{
"properties": {
"foo": {
"minProperties": 2,
"properties": {"bar": {"type": "object"}},
},
},
},
],
}
best = self.best_match_of(instance={"foo": {"bar": []}}, schema=schema)
self.assertEqual(best.validator, "minProperties")

def test_no_oneOf_traversal_for_equally_relevant_errors(self):
"""
We don't traverse into an oneOf (as above) if all of its context errors
Expand All @@ -168,6 +310,53 @@ def test_no_oneOf_traversal_for_equally_relevant_errors(self):
best = self.best_match_of(instance=[], schema=schema)
self.assertEqual(best.validator, "oneOf")

def test_no_oneOf_traversal_for_two_properties_sibling_errors(self):
"""
We don't traverse into an oneOf if all of its context errors
seem to be equally "wrong" against the instance.
"""

schema = {
"oneOf": [
{"properties": {"foo": {"type": "string"}}},
{"properties": {"bar": {"type": "string"}}},
],
}
best = self.best_match_of(instance={"foo": 1, "bar": 1}, schema=schema)
self.assertEqual(best.validator, "oneOf")

def test_no_oneOf_traversal_for_two_items_sibling_errors(self):
"""
We don't traverse into an anyOf if all of its context errors
seem to be equally "wrong" against the instance.
"""

schema = {
"oneOf": [
{
"type": "array",
"items": {
"$ref": "#/$defs/int_array",
},
},
{
"$ref": "#/$defs/int_array",
},
],
"$defs": {
"int_array": {
"type": "array",
"items": {
"type": "integer",
},
},
},
}
best = self.best_match_of(instance=["not an int", 0], schema=schema)
self.assertEqual(best.validator, "oneOf")
best = self.best_match_of(instance=[0, "not an int"], schema=schema)
self.assertEqual(best.validator, "oneOf")

def test_oneOf_traversal_for_single_equally_relevant_error(self):
"""
We *do* traverse oneOf with a single nested error, even though it is
Expand All @@ -182,7 +371,7 @@ def test_oneOf_traversal_for_single_equally_relevant_error(self):
best = self.best_match_of(instance=[], schema=schema)
self.assertEqual(best.validator, "type")

def test_oneOf_traversal_for_single_sibling_errors(self):
def test_oneOf_traversal_for_single_sibling_errors_choose_first(self):
"""
We *do* traverse oneOf with a single subschema that fails multiple
times (e.g. on multiple items).
Expand All @@ -193,8 +382,9 @@ def test_oneOf_traversal_for_single_sibling_errors(self):
{"items": {"const": 37}},
],
}
best = self.best_match_of(instance=[12, 12], schema=schema)
best = self.best_match_of(instance=[12, 13], schema=schema)
self.assertEqual(best.validator, "const")
self.assertEqual(best.instance, 12)

def test_oneOf_traversal_for_non_type_matching_sibling_errors(self):
"""
Expand Down