Skip to content

Commit

Permalink
Merge pull request #705 from IanCa/develop
Browse files Browse the repository at this point in the history
Clean up some of the overly complex functions some, add a few unit tests
  • Loading branch information
VisLab committed Jun 22, 2023
2 parents 4f08dd2 + 4ae9b4a commit db04a14
Show file tree
Hide file tree
Showing 13 changed files with 515 additions and 381 deletions.
12 changes: 8 additions & 4 deletions hed/models/hed_tag.py
Original file line number Diff line number Diff line change
Expand Up @@ -602,10 +602,14 @@ def _get_tag_units_portion(self, tag_unit_classes):
@staticmethod
def _find_modifier_unit_entry(units, all_valid_unit_permutations):
possible_match = all_valid_unit_permutations.get(units)
if not possible_match or not possible_match.has_attribute(HedKey.UnitSymbol):
possible_match = all_valid_unit_permutations.get(units.lower())
if possible_match and possible_match.has_attribute(HedKey.UnitSymbol):
possible_match = None
# If we have a match that's a unit symbol, we're done, return it.
if possible_match and possible_match.has_attribute(HedKey.UnitSymbol):
return possible_match

possible_match = all_valid_unit_permutations.get(units.lower())
# Unit symbols must match including case, a match of a unit symbol now is something like M becoming m.
if possible_match and possible_match.has_attribute(HedKey.UnitSymbol):
possible_match = None

return possible_match

Expand Down
68 changes: 45 additions & 23 deletions hed/schema/hed_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -509,7 +509,6 @@ def _find_tag_entry(self, tag, schema_namespace=""):
clean_tag = str(tag)
namespace = schema_namespace
clean_tag = clean_tag[len(namespace):]
prefix_tag_adj = len(namespace)
working_tag = clean_tag.lower()

# Most tags are in the schema directly, so test that first
Expand All @@ -523,9 +522,26 @@ def _find_tag_entry(self, tag, schema_namespace=""):

return found_entry, remainder, []

prefix_tag_adj = len(namespace)

try:
found_entry, current_slash_index = self._find_tag_subfunction(tag, working_tag, prefix_tag_adj)
except self._TagIdentifyError as e:
issue = e.issue
return None, None, issue

remainder = None
if current_slash_index != -1:
remainder = clean_tag[current_slash_index:]
if remainder and found_entry.takes_value_child_entry:
found_entry = found_entry.takes_value_child_entry

return found_entry, remainder, []

def _find_tag_subfunction(self, tag, working_tag, prefix_tag_adj):
"""Finds the base tag and remainder from the left, raising exception on issues"""
current_slash_index = -1
current_entry = None

# Loop left to right, checking each word. Once we find an invalid word, we stop.
while True:
next_index = working_tag.find("/", current_slash_index + 1)
Expand All @@ -541,36 +557,37 @@ def _find_tag_entry(self, tag, schema_namespace=""):
tag,
index_in_tag=prefix_tag_adj,
index_in_tag_end=prefix_tag_adj + next_index)
return None, None, error
raise self._TagIdentifyError(error)
# If this is not a takes value node, validate each term in the remainder.
if not current_entry.takes_value_child_entry:
child_names = working_tag[current_slash_index + 1:].split("/")
word_start_index = current_slash_index + 1 + prefix_tag_adj
for name in child_names:
if self._get_tag_entry(name):
error = ErrorHandler.format_error(ValidationErrors.INVALID_PARENT_NODE,
tag,
index_in_tag=word_start_index,
index_in_tag_end=word_start_index + len(name),
expected_parent_tag=self.all_tags[name].name)
return None, None, error
word_start_index += len(name) + 1
# This will raise _TagIdentifyError on any issues
self._validate_remaining_terms(tag, working_tag, prefix_tag_adj, current_slash_index)
break

current_entry = parent_entry
current_slash_index = next_index
if next_index == len(working_tag):
break
continue

remainder = None
if current_slash_index != -1:
remainder = clean_tag[current_slash_index:]
if remainder and current_entry.takes_value_child_entry:
current_entry = current_entry.takes_value_child_entry
found_entry = current_entry

return found_entry, remainder, []
return current_entry, current_slash_index

def _validate_remaining_terms(self, tag, working_tag, prefix_tag_adj, current_slash_index):
""" Validates the terms past current_slash_index.
:raises _TagIdentifyError:
- One of the extension terms already exists as a schema term.
"""
child_names = working_tag[current_slash_index + 1:].split("/")
word_start_index = current_slash_index + 1 + prefix_tag_adj
for name in child_names:
if self._get_tag_entry(name):
error = ErrorHandler.format_error(ValidationErrors.INVALID_PARENT_NODE,
tag,
index_in_tag=word_start_index,
index_in_tag_end=word_start_index + len(name),
expected_parent_tag=self.all_tags[name].name)
raise self._TagIdentifyError(error)
word_start_index += len(name) + 1

# ===============================================
# Semi-private creation finalizing functions
Expand Down Expand Up @@ -801,3 +818,8 @@ def _add_tag_to_dict(self, long_tag_name, new_entry, key_class):
def _create_tag_entry(self, long_tag_name, key_class):
section = self._sections[key_class]
return section._create_tag_entry(long_tag_name)

class _TagIdentifyError(Exception):
"""Used internally to note when a tag cannot be identified."""
def __init__(self, issue):
self.issue = issue
81 changes: 81 additions & 0 deletions hed/schema/schema_attribute_validators.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
"""The built-in functions to validate known attributes.
Template for the functions:
attribute_checker_template(hed_schema, tag_entry, attribute_name, possible_values):
hed_schema (HedSchema): The schema to use for validation
tag_entry (HedSchemaEntry): The schema entry for this tag.
attribute_name (str): The name of this attribute
Returns:
bool
"""

from hed.errors.error_types import SchemaWarnings, ValidationErrors
from hed.errors.error_reporter import ErrorHandler
from hed.schema.hed_schema import HedSchema


def tag_is_placeholder_check(hed_schema, tag_entry, attribute_name):
""" Check if comma separated list has valid HedTags.
Parameters:
hed_schema (HedSchema): The schema to use for validation
tag_entry (HedSchemaEntry): The schema entry for this tag.
attribute_name (str): The name of this attribute
Returns:
list: A list of issues. Each issue is a dictionary.
"""
issues = []
if not tag_entry.name.endswith("/#"):
issues += ErrorHandler.format_error(SchemaWarnings.NON_PLACEHOLDER_HAS_CLASS, tag_entry.name,
attribute_name)

return issues


def tag_exists_check(hed_schema, tag_entry, attribute_name):
""" Check if the list of possible tags exists in the schema.
Parameters:
hed_schema (HedSchema): The schema to use for validation
tag_entry (HedSchemaEntry): The schema entry for this tag.
attribute_name (str): The name of this attribute
Returns:
list: A list of issues. Each issue is a dictionary.
"""
issues = []
possible_tags = tag_entry.attributes.get(attribute_name, "")
split_tags = possible_tags.split(",")
for org_tag in split_tags:
if org_tag and org_tag not in hed_schema.all_tags:
issues += ErrorHandler.format_error(ValidationErrors.NO_VALID_TAG_FOUND,
org_tag,
index_in_tag=0,
index_in_tag_end=len(org_tag))

return issues


def tag_exists_base_schema_check(hed_schema, tag_entry, attribute_name):
""" Check if the single tag is a partnered schema tag
Parameters:
hed_schema (HedSchema): The schema to use for validation
tag_entry (HedSchemaEntry): The schema entry for this tag.
attribute_name (str): The name of this attribute
Returns:
list: A list of issues. Each issue is a dictionary.
"""
issues = []
rooted_tag = tag_entry.attributes.get(attribute_name, "")
if rooted_tag and rooted_tag not in hed_schema.all_tags:
issues += ErrorHandler.format_error(ValidationErrors.NO_VALID_TAG_FOUND,
rooted_tag,
index_in_tag=0,
index_in_tag_end=len(rooted_tag))

return issues
Loading

0 comments on commit db04a14

Please sign in to comment.