Merge pull request #705 from IanCa/develop

Clean up some of the overly complex functions some, add a few unit tests
hed-standard · Jun 22, 2023 · db04a14 · db04a14
2 parents 4f08dd2 + 4ae9b4a
commit db04a14
Show file tree

Hide file tree

Showing 13 changed files with 515 additions and 381 deletions.
diff --git a/hed/models/hed_tag.py b/hed/models/hed_tag.py
@@ -602,10 +602,14 @@ def _get_tag_units_portion(self, tag_unit_classes):
     @staticmethod
     def _find_modifier_unit_entry(units, all_valid_unit_permutations):
         possible_match = all_valid_unit_permutations.get(units)
-        if not possible_match or not possible_match.has_attribute(HedKey.UnitSymbol):
-            possible_match = all_valid_unit_permutations.get(units.lower())
-            if possible_match and possible_match.has_attribute(HedKey.UnitSymbol):
-                possible_match = None
+        # If we have a match that's a unit symbol, we're done, return it.
+        if possible_match and possible_match.has_attribute(HedKey.UnitSymbol):
+            return possible_match
+
+        possible_match = all_valid_unit_permutations.get(units.lower())
+        # Unit symbols must match including case, a match of a unit symbol now is something like M becoming m.
+        if possible_match and possible_match.has_attribute(HedKey.UnitSymbol):
+            possible_match = None
 
         return possible_match
 

diff --git a/hed/schema/hed_schema.py b/hed/schema/hed_schema.py
@@ -509,7 +509,6 @@ def _find_tag_entry(self, tag, schema_namespace=""):
         clean_tag = str(tag)
         namespace = schema_namespace
         clean_tag = clean_tag[len(namespace):]
-        prefix_tag_adj = len(namespace)
         working_tag = clean_tag.lower()
 
         # Most tags are in the schema directly, so test that first
@@ -523,9 +522,26 @@ def _find_tag_entry(self, tag, schema_namespace=""):
 
             return found_entry, remainder, []
 
+        prefix_tag_adj = len(namespace)
+
+        try:
+            found_entry, current_slash_index = self._find_tag_subfunction(tag, working_tag, prefix_tag_adj)
+        except self._TagIdentifyError as e:
+            issue = e.issue
+            return None, None, issue
+
+        remainder = None
+        if current_slash_index != -1:
+            remainder = clean_tag[current_slash_index:]
+        if remainder and found_entry.takes_value_child_entry:
+            found_entry = found_entry.takes_value_child_entry
+
+        return found_entry, remainder, []
+
+    def _find_tag_subfunction(self, tag, working_tag, prefix_tag_adj):
+        """Finds the base tag and remainder from the left, raising exception on issues"""
         current_slash_index = -1
         current_entry = None
-
         # Loop left to right, checking each word.  Once we find an invalid word, we stop.
         while True:
             next_index = working_tag.find("/", current_slash_index + 1)
@@ -541,36 +557,37 @@ def _find_tag_entry(self, tag, schema_namespace=""):
                                                       tag,
                                                       index_in_tag=prefix_tag_adj,
                                                       index_in_tag_end=prefix_tag_adj + next_index)
-                    return None, None, error
+                    raise self._TagIdentifyError(error)
                 # If this is not a takes value node, validate each term in the remainder.
                 if not current_entry.takes_value_child_entry:
-                    child_names = working_tag[current_slash_index + 1:].split("/")
-                    word_start_index = current_slash_index + 1 + prefix_tag_adj
-                    for name in child_names:
-                        if self._get_tag_entry(name):
-                            error = ErrorHandler.format_error(ValidationErrors.INVALID_PARENT_NODE,
-                                                              tag,
-                                                              index_in_tag=word_start_index,
-                                                              index_in_tag_end=word_start_index + len(name),
-                                                              expected_parent_tag=self.all_tags[name].name)
-                            return None, None, error
-                        word_start_index += len(name) + 1
+                    # This will raise _TagIdentifyError on any issues
+                    self._validate_remaining_terms(tag, working_tag, prefix_tag_adj, current_slash_index)
                 break
 
             current_entry = parent_entry
             current_slash_index = next_index
             if next_index == len(working_tag):
                 break
-            continue
-
-        remainder = None
-        if current_slash_index != -1:
-            remainder = clean_tag[current_slash_index:]
-        if remainder and current_entry.takes_value_child_entry:
-            current_entry = current_entry.takes_value_child_entry
-        found_entry = current_entry
 
-        return found_entry, remainder, []
+        return current_entry, current_slash_index
+
+    def _validate_remaining_terms(self, tag, working_tag, prefix_tag_adj, current_slash_index):
+        """ Validates the terms past current_slash_index.
+        
+        :raises _TagIdentifyError:
+            - One of the extension terms already exists as a schema term.
+        """
+        child_names = working_tag[current_slash_index + 1:].split("/")
+        word_start_index = current_slash_index + 1 + prefix_tag_adj
+        for name in child_names:
+            if self._get_tag_entry(name):
+                error = ErrorHandler.format_error(ValidationErrors.INVALID_PARENT_NODE,
+                                                  tag,
+                                                  index_in_tag=word_start_index,
+                                                  index_in_tag_end=word_start_index + len(name),
+                                                  expected_parent_tag=self.all_tags[name].name)
+                raise self._TagIdentifyError(error)
+            word_start_index += len(name) + 1
 
     # ===============================================
     # Semi-private creation finalizing functions
@@ -801,3 +818,8 @@ def _add_tag_to_dict(self, long_tag_name, new_entry, key_class):
     def _create_tag_entry(self, long_tag_name, key_class):
         section = self._sections[key_class]
         return section._create_tag_entry(long_tag_name)
+
+    class _TagIdentifyError(Exception):
+        """Used internally to note when a tag cannot be identified."""
+        def __init__(self, issue):
+            self.issue = issue
diff --git a/hed/schema/schema_attribute_validators.py b/hed/schema/schema_attribute_validators.py
@@ -0,0 +1,81 @@
+"""The built-in functions to validate known attributes.
+
+Template for the functions:
+attribute_checker_template(hed_schema, tag_entry, attribute_name, possible_values):
+    hed_schema (HedSchema): The schema to use for validation
+    tag_entry (HedSchemaEntry): The schema entry for this tag.
+    attribute_name (str): The name of this attribute
+Returns:
+    bool
+"""
+
+from hed.errors.error_types import SchemaWarnings, ValidationErrors
+from hed.errors.error_reporter import ErrorHandler
+from hed.schema.hed_schema import HedSchema
+
+
+def tag_is_placeholder_check(hed_schema, tag_entry, attribute_name):
+    """ Check if comma separated list has valid HedTags.
+
+    Parameters:
+        hed_schema (HedSchema): The schema to use for validation
+        tag_entry (HedSchemaEntry): The schema entry for this tag.
+        attribute_name (str): The name of this attribute
+
+    Returns:
+        list: A list of issues. Each issue is a dictionary.
+
+    """
+    issues = []
+    if not tag_entry.name.endswith("/#"):
+        issues += ErrorHandler.format_error(SchemaWarnings.NON_PLACEHOLDER_HAS_CLASS, tag_entry.name,
+                                            attribute_name)
+
+    return issues
+
+
+def tag_exists_check(hed_schema, tag_entry, attribute_name):
+    """ Check if the list of possible tags exists in the schema.
+
+    Parameters:
+        hed_schema (HedSchema): The schema to use for validation
+        tag_entry (HedSchemaEntry): The schema entry for this tag.
+        attribute_name (str): The name of this attribute
+
+    Returns:
+        list: A list of issues. Each issue is a dictionary.
+
+    """
+    issues = []
+    possible_tags = tag_entry.attributes.get(attribute_name, "")
+    split_tags = possible_tags.split(",")
+    for org_tag in split_tags:
+        if org_tag and org_tag not in hed_schema.all_tags:
+            issues += ErrorHandler.format_error(ValidationErrors.NO_VALID_TAG_FOUND,
+                                                org_tag,
+                                                index_in_tag=0,
+                                                index_in_tag_end=len(org_tag))
+
+    return issues
+
+
+def tag_exists_base_schema_check(hed_schema, tag_entry, attribute_name):
+    """ Check if the single tag is a partnered schema tag
+
+    Parameters:
+        hed_schema (HedSchema): The schema to use for validation
+        tag_entry (HedSchemaEntry): The schema entry for this tag.
+        attribute_name (str): The name of this attribute
+
+    Returns:
+        list: A list of issues. Each issue is a dictionary.
+    """
+    issues = []
+    rooted_tag = tag_entry.attributes.get(attribute_name, "")
+    if rooted_tag and rooted_tag not in hed_schema.all_tags:
+        issues += ErrorHandler.format_error(ValidationErrors.NO_VALID_TAG_FOUND,
+                                            rooted_tag,
+                                            index_in_tag=0,
+                                            index_in_tag_end=len(rooted_tag))
+
+    return issues