Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Clean up some of the overly complex functions some, add a few unit tests #705

Merged
merged 1 commit into from
Jun 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 8 additions & 4 deletions hed/models/hed_tag.py
Original file line number Diff line number Diff line change
Expand Up @@ -602,10 +602,14 @@ def _get_tag_units_portion(self, tag_unit_classes):
@staticmethod
def _find_modifier_unit_entry(units, all_valid_unit_permutations):
possible_match = all_valid_unit_permutations.get(units)
if not possible_match or not possible_match.has_attribute(HedKey.UnitSymbol):
possible_match = all_valid_unit_permutations.get(units.lower())
if possible_match and possible_match.has_attribute(HedKey.UnitSymbol):
possible_match = None
# If we have a match that's a unit symbol, we're done, return it.
if possible_match and possible_match.has_attribute(HedKey.UnitSymbol):
return possible_match

possible_match = all_valid_unit_permutations.get(units.lower())
# Unit symbols must match including case, a match of a unit symbol now is something like M becoming m.
if possible_match and possible_match.has_attribute(HedKey.UnitSymbol):
possible_match = None

return possible_match

Expand Down
68 changes: 45 additions & 23 deletions hed/schema/hed_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -509,7 +509,6 @@ def _find_tag_entry(self, tag, schema_namespace=""):
clean_tag = str(tag)
namespace = schema_namespace
clean_tag = clean_tag[len(namespace):]
prefix_tag_adj = len(namespace)
working_tag = clean_tag.lower()

# Most tags are in the schema directly, so test that first
Expand All @@ -523,9 +522,26 @@ def _find_tag_entry(self, tag, schema_namespace=""):

return found_entry, remainder, []

prefix_tag_adj = len(namespace)

try:
found_entry, current_slash_index = self._find_tag_subfunction(tag, working_tag, prefix_tag_adj)
except self._TagIdentifyError as e:
issue = e.issue
return None, None, issue

remainder = None
if current_slash_index != -1:
remainder = clean_tag[current_slash_index:]
if remainder and found_entry.takes_value_child_entry:
found_entry = found_entry.takes_value_child_entry

return found_entry, remainder, []

def _find_tag_subfunction(self, tag, working_tag, prefix_tag_adj):
"""Finds the base tag and remainder from the left, raising exception on issues"""
current_slash_index = -1
current_entry = None

# Loop left to right, checking each word. Once we find an invalid word, we stop.
while True:
next_index = working_tag.find("/", current_slash_index + 1)
Expand All @@ -541,36 +557,37 @@ def _find_tag_entry(self, tag, schema_namespace=""):
tag,
index_in_tag=prefix_tag_adj,
index_in_tag_end=prefix_tag_adj + next_index)
return None, None, error
raise self._TagIdentifyError(error)
# If this is not a takes value node, validate each term in the remainder.
if not current_entry.takes_value_child_entry:
child_names = working_tag[current_slash_index + 1:].split("/")
word_start_index = current_slash_index + 1 + prefix_tag_adj
for name in child_names:
if self._get_tag_entry(name):
error = ErrorHandler.format_error(ValidationErrors.INVALID_PARENT_NODE,
tag,
index_in_tag=word_start_index,
index_in_tag_end=word_start_index + len(name),
expected_parent_tag=self.all_tags[name].name)
return None, None, error
word_start_index += len(name) + 1
# This will raise _TagIdentifyError on any issues
self._validate_remaining_terms(tag, working_tag, prefix_tag_adj, current_slash_index)
break

current_entry = parent_entry
current_slash_index = next_index
if next_index == len(working_tag):
break
continue

remainder = None
if current_slash_index != -1:
remainder = clean_tag[current_slash_index:]
if remainder and current_entry.takes_value_child_entry:
current_entry = current_entry.takes_value_child_entry
found_entry = current_entry

return found_entry, remainder, []
return current_entry, current_slash_index

def _validate_remaining_terms(self, tag, working_tag, prefix_tag_adj, current_slash_index):
""" Validates the terms past current_slash_index.

:raises _TagIdentifyError:
- One of the extension terms already exists as a schema term.
"""
child_names = working_tag[current_slash_index + 1:].split("/")
word_start_index = current_slash_index + 1 + prefix_tag_adj
for name in child_names:
if self._get_tag_entry(name):
error = ErrorHandler.format_error(ValidationErrors.INVALID_PARENT_NODE,
tag,
index_in_tag=word_start_index,
index_in_tag_end=word_start_index + len(name),
expected_parent_tag=self.all_tags[name].name)
raise self._TagIdentifyError(error)
word_start_index += len(name) + 1

# ===============================================
# Semi-private creation finalizing functions
Expand Down Expand Up @@ -801,3 +818,8 @@ def _add_tag_to_dict(self, long_tag_name, new_entry, key_class):
def _create_tag_entry(self, long_tag_name, key_class):
section = self._sections[key_class]
return section._create_tag_entry(long_tag_name)

class _TagIdentifyError(Exception):
"""Used internally to note when a tag cannot be identified."""
def __init__(self, issue):
self.issue = issue
81 changes: 81 additions & 0 deletions hed/schema/schema_attribute_validators.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
"""The built-in functions to validate known attributes.

Template for the functions:
attribute_checker_template(hed_schema, tag_entry, attribute_name, possible_values):
hed_schema (HedSchema): The schema to use for validation
tag_entry (HedSchemaEntry): The schema entry for this tag.
attribute_name (str): The name of this attribute
Returns:
bool
"""

from hed.errors.error_types import SchemaWarnings, ValidationErrors
from hed.errors.error_reporter import ErrorHandler
from hed.schema.hed_schema import HedSchema


def tag_is_placeholder_check(hed_schema, tag_entry, attribute_name):
""" Check if comma separated list has valid HedTags.

Parameters:
hed_schema (HedSchema): The schema to use for validation
tag_entry (HedSchemaEntry): The schema entry for this tag.
attribute_name (str): The name of this attribute

Returns:
list: A list of issues. Each issue is a dictionary.

"""
issues = []
if not tag_entry.name.endswith("/#"):
issues += ErrorHandler.format_error(SchemaWarnings.NON_PLACEHOLDER_HAS_CLASS, tag_entry.name,
attribute_name)

return issues


def tag_exists_check(hed_schema, tag_entry, attribute_name):
""" Check if the list of possible tags exists in the schema.

Parameters:
hed_schema (HedSchema): The schema to use for validation
tag_entry (HedSchemaEntry): The schema entry for this tag.
attribute_name (str): The name of this attribute

Returns:
list: A list of issues. Each issue is a dictionary.

"""
issues = []
possible_tags = tag_entry.attributes.get(attribute_name, "")
split_tags = possible_tags.split(",")
for org_tag in split_tags:
if org_tag and org_tag not in hed_schema.all_tags:
issues += ErrorHandler.format_error(ValidationErrors.NO_VALID_TAG_FOUND,
org_tag,
index_in_tag=0,
index_in_tag_end=len(org_tag))

return issues


def tag_exists_base_schema_check(hed_schema, tag_entry, attribute_name):
""" Check if the single tag is a partnered schema tag

Parameters:
hed_schema (HedSchema): The schema to use for validation
tag_entry (HedSchemaEntry): The schema entry for this tag.
attribute_name (str): The name of this attribute

Returns:
list: A list of issues. Each issue is a dictionary.
"""
issues = []
rooted_tag = tag_entry.attributes.get(attribute_name, "")
if rooted_tag and rooted_tag not in hed_schema.all_tags:
issues += ErrorHandler.format_error(ValidationErrors.NO_VALID_TAG_FOUND,
rooted_tag,
index_in_tag=0,
index_in_tag_end=len(rooted_tag))

return issues
Loading