hed-standard · VisLab · Apr 25, 2023 · Apr 24, 2023
diff --git a/hed/errors/error_messages.py b/hed/errors/error_messages.py
@@ -219,6 +219,13 @@ def schema_error_hed_duplicate_node(tag, duplicate_tag_list, section):
            f"{tag_join_delimiter}{tag_join_delimiter.join(duplicate_tag_list)}"
 
 
+@hed_error(SchemaErrors.HED_SCHEMA_DUPLICATE_FROM_LIBRARY)
+def schema_error_hed_duplicate_node(tag, duplicate_tag_list, section):
+    tag_join_delimiter = "\n\t"
+    return f"Duplicate term '{str(tag)}' was found in the library and in the standard schema in '{section}' section schema as:" + \
+           f"{tag_join_delimiter}{tag_join_delimiter.join(duplicate_tag_list)}"
+
+
 @hed_error(SchemaErrors.HED_SCHEMA_ATTRIBUTE_INVALID)
 def schema_error_unknown_attribute(attribute_name, source_tag):
     return f"Attribute '{attribute_name}' used by '{source_tag}' was not defined in the schema, " \
@@ -249,6 +256,13 @@ def schema_warning_non_placeholder_class(tag_name, invalid_attribute_name):
            f"Found {invalid_attribute_name} on {tag_name}"
 
 
+@hed_error(SchemaWarnings.INVALID_ATTRIBUTE, default_severity=ErrorSeverity.ERROR)
+def schema_error_invalid_attribute(tag_name, invalid_attribute_name):
+    return f"'{invalid_attribute_name}' should not be present in a loaded schema, found on '{tag_name}'." \
+           f"Something went very wrong."
+
+
+
 @hed_error(SidecarErrors.BLANK_HED_STRING)
 def sidecar_error_blank_hed_string():
     return "No HED string found for Value or Category column."

diff --git a/hed/errors/error_types.py b/hed/errors/error_types.py
@@ -104,6 +104,7 @@ class SidecarErrors:
 class SchemaErrors:
     HED_SCHEMA_DUPLICATE_NODE = 'HED_SCHEMA_DUPLICATE_NODE'
     HED_SCHEMA_ATTRIBUTE_INVALID = 'HED_SCHEMA_ATTRIBUTE_INVALID'
+    HED_SCHEMA_DUPLICATE_FROM_LIBRARY = "SCHEMA_LIBRARY_INVALID"
 
 
 class SchemaWarnings:
@@ -113,6 +114,7 @@ class SchemaWarnings:
     HED_SCHEMA_CHARACTER_INVALID = "HED_SCHEMA_CHARACTER_INVALID"
     INVALID_CAPITALIZATION = 'invalidCaps'
     NON_PLACEHOLDER_HAS_CLASS = 'NON_PLACEHOLDER_HAS_CLASS'
+    INVALID_ATTRIBUTE = "INVALID_ATTRIBUTE"
 
 
 class DefinitionErrors:

diff --git a/hed/errors/exceptions.py b/hed/errors/exceptions.py
@@ -14,7 +14,16 @@ class HedExceptions:
     # These are actual schema issues, not that the file cannot be found or parsed
     SCHEMA_HEADER_MISSING = 'HED_SCHEMA_HEADER_INVALID'
     HED_SCHEMA_HEADER_INVALID = 'HED_SCHEMA_HEADER_INVALID'
-    BAD_HED_LIBRARY_NAME = 'badHedLibraryName'
+
+    SCHEMA_LIBRARY_INVALID = "SCHEMA_LIBRARY_INVALID"
+    BAD_HED_LIBRARY_NAME = 'SCHEMA_LIBRARY_INVALID'
+    BAD_WITH_STANDARD = "SCHEMA_LIBRARY_INVALID"
+    BAD_WITH_STANDARD_VERSION = "SCHEMA_LIBRARY_INVALID"
+    ROOTED_TAG_INVALID = "SCHEMA_LIBRARY_INVALID"
+    ROOTED_TAG_HAS_PARENT = "SCHEMA_LIBRARY_INVALID"
+    ROOTED_TAG_DOES_NOT_EXIST = "SCHEMA_LIBRARY_INVALID"
+    IN_LIBRARY_IN_UNMERGED = "SCHEMA_LIBRARY_INVALID"
+
     HED_SCHEMA_VERSION_INVALID = 'HED_SCHEMA_VERSION_INVALID'
     SCHEMA_START_MISSING = 'HED_WIKI_SEPARATOR_INVALID'
     SCHEMA_END_INVALID = 'HED_WIKI_SEPARATOR_INVALID'
@@ -31,14 +40,14 @@ class HedExceptions:
 
 class HedFileError(Exception):
     """Exception raised when a file cannot be parsed due to being malformed, file IO, etc."""
-    def __init__(self, error_type, message, filename, issues=None):
-        self.error_type = error_type
+    def __init__(self, code, message, filename, issues=None):
+        self.code = code
         self.message = message
         self.filename = filename
         self.issues = issues
         if self.issues is None:
             self.issues = [
                 {'message': message,
                  ErrorContext.FILE_NAME: filename,
-                 'error_code': error_type}
+                 'code': code}
             ]
diff --git a/hed/schema/hed_schema.py b/hed/schema/hed_schema.py
@@ -1,3 +1,5 @@
+import os
+import shutil
 
 from hed.schema.hed_schema_constants import HedKey, HedSectionKey
 from hed.schema import hed_schema_constants as constants
@@ -6,7 +8,7 @@
 from hed.schema.schema_io.schema2wiki import HedSchema2Wiki
 
 from hed.schema import schema_validation_util
-from hed.schema.hed_schema_section import HedSchemaSection, HedSchemaTagSection
+from hed.schema.hed_schema_section import HedSchemaSection, HedSchemaTagSection, HedSchemaUnitClassSection
 from hed.errors import ErrorHandler
 from hed.errors.error_types import ValidationErrors
 
@@ -84,7 +86,7 @@ def merged(self):
 
 
         """
-        return self.header_attributes.get(constants.MERGED_ATTRIBUTE, "")
+        return not self.header_attributes.get(constants.UNMERGED_ATTRIBUTE, "")
 
     def get_save_header_attributes(self, save_merged=False):
         """ returns the attributes that should be saved.
@@ -93,11 +95,12 @@ def get_save_header_attributes(self, save_merged=False):
         sort_to_start = "!!!!!!!!!!!!!!"
         header_attributes = dict(sorted(self.header_attributes.items(), key=lambda x: sort_to_start if x[0] == constants.VERSION_ATTRIBUTE else x[0], reverse=False))
         if save_merged:
-            # make sure it's the last attribute(just to make sure it's in an order)
-            header_attributes.pop(constants.MERGED_ATTRIBUTE, None)
-            header_attributes[constants.MERGED_ATTRIBUTE] = "True"
+            header_attributes.pop(constants.UNMERGED_ATTRIBUTE, None)
         else:
-            header_attributes.pop(constants.MERGED_ATTRIBUTE, None)
+            # make sure it's the last attribute(just to make sure it's in an order)
+            header_attributes.pop(constants.UNMERGED_ATTRIBUTE, None)
+            header_attributes[constants.UNMERGED_ATTRIBUTE] = "True"
+
 
         return header_attributes
 
@@ -137,8 +140,8 @@ def get_as_mediawiki_string(self, save_merged=False):
         """ Return the schema to a mediawiki string.
 
         save_merged: bool
-            If true, this will save the schema as a merged schema if it is a "with-standard" schema.
-            If it is not a "with-standard" schema, this setting has no effect.
+            If true, this will save the schema as a merged schema if it is a "withStandard" schema.
+            If it is not a "withStandard" schema, this setting has no effect.
         Returns:
             str:  The schema as a string in mediawiki format.
 
@@ -147,12 +150,12 @@ def get_as_mediawiki_string(self, save_merged=False):
         output_strings = schema2wiki.process_schema(self, save_merged)
         return '\n'.join(output_strings)
 
-    def get_as_xml_string(self, save_merged=False):
+    def get_as_xml_string(self, save_merged=True):
         """ Return the schema to an XML string.
 
         save_merged: bool
-            If true, this will save the schema as a merged schema if it is a "with-standard" schema.
-            If it is not a "with-standard" schema, this setting has no effect.
+            If true, this will save the schema as a merged schema if it is a "withStandard" schema.
+            If it is not a "withStandard" schema, this setting has no effect.
         Returns:
             str: Return the schema as an XML string.
 
@@ -161,33 +164,50 @@ def get_as_xml_string(self, save_merged=False):
         xml_tree = schema2xml.process_schema(self, save_merged)
         return schema_util._xml_element_2_str(xml_tree)
 
-    def save_as_mediawiki(self, save_merged=False):
+    def save_as_mediawiki(self, filename=None, save_merged=False):
         """ Save as mediawiki to a temporary file.
 
+        filename: str
+            If present, move the resulting file to this location.
         save_merged: bool
-            If true, this will save the schema as a merged schema if it is a "with-standard" schema.
-            If it is not a "with-standard" schema, this setting has no effect.
+            If true, this will save the schema as a merged schema if it is a "withStandard" schema.
+            If it is not a "withStandard" schema, this setting has no effect.
+
         Returns:
             str:    The newly created schema filename.
-
         """
         schema2wiki = HedSchema2Wiki()
         output_strings = schema2wiki.process_schema(self, save_merged)
         local_wiki_file = schema_util.write_strings_to_file(output_strings, ".mediawiki")
+        if filename:
+            directory = os.path.dirname(filename)
+            if directory and not os.path.exists(directory):
+                os.makedirs(directory)
+            shutil.move(local_wiki_file, filename)
+            return filename
         return local_wiki_file
 
-    def save_as_xml(self, save_merged=False):
+    def save_as_xml(self, filename=None, save_merged=True):
         """ Save as XML to a temporary file.
 
+        filename: str
+            If present, move the resulting file to this location.
+        save_merged: bool
+            If true, this will save the schema as a merged schema if it is a "withStandard" schema.
+            If it is not a "withStandard" schema, this setting has no effect.
+
         Returns:
             str: The name of the newly created schema file.
-        save_merged: bool
-            If true, this will save the schema as a merged schema if it is a "with-standard" schema.
-            If it is not a "with-standard" schema, this setting has no effect.
         """
         schema2xml = HedSchema2XML()
         xml_tree = schema2xml.process_schema(self, save_merged)
         local_xml_file = schema_util.write_xml_tree_2_xml_file(xml_tree, ".xml")
+        if filename:
+            directory = os.path.dirname(filename)
+            if directory and not os.path.exists(directory):
+                os.makedirs(directory)
+            shutil.move(local_xml_file, filename)
+            return filename
         return local_xml_file
 
     def set_schema_prefix(self, schema_prefix):
@@ -691,7 +711,7 @@ def _create_empty_sections():
         dictionaries[HedSectionKey.Attributes] = HedSchemaSection(HedSectionKey.Attributes)
         dictionaries[HedSectionKey.UnitModifiers] = HedSchemaSection(HedSectionKey.UnitModifiers)
         dictionaries[HedSectionKey.Units] = HedSchemaSection(HedSectionKey.Units)
-        dictionaries[HedSectionKey.UnitClasses] = HedSchemaSection(HedSectionKey.UnitClasses)
+        dictionaries[HedSectionKey.UnitClasses] = HedSchemaUnitClassSection(HedSectionKey.UnitClasses)
         dictionaries[HedSectionKey.ValueClasses] = HedSchemaSection(HedSectionKey.ValueClasses)
         dictionaries[HedSectionKey.AllTags] = HedSchemaTagSection(HedSectionKey.AllTags, case_sensitive=False)
 
@@ -768,6 +788,14 @@ def _get_attributes_for_section(self, key_class):
     # ===============================================
     # Semi private function used to create a schema in memory(usually from a source file)
     # ===============================================
-    def _add_tag_to_dict(self, long_tag_name, key_class):
+    def _add_tag_to_dict(self, long_tag_name, new_entry, key_class):
+        # No reason we can't add this here always
+        if self.library and not self.merged:
+            new_entry.set_attribute_value(HedKey.InLibrary, self.library)
+
+        section = self._sections[key_class]
+        return section._add_to_dict(long_tag_name, new_entry)
+
+    def _create_tag_entry(self, long_tag_name, key_class):
         section = self._sections[key_class]
-        return section._add_to_dict(long_tag_name)
+        return section._create_tag_entry(long_tag_name)
diff --git a/hed/schema/hed_schema_constants.py b/hed/schema/hed_schema_constants.py
@@ -39,6 +39,7 @@ class HedKey:
     ValueClass = "valueClass"
     RelatedTag = "relatedTag"
     SuggestedTag = "suggestedTag"
+    Rooted = "rooted"
 
     # All known properties
     BoolProperty = 'boolProperty'
@@ -66,5 +67,5 @@ class HedKey:
 
 VERSION_ATTRIBUTE = 'version'
 LIBRARY_ATTRIBUTE = 'library'
-WITH_STANDARD_ATTRIBUTE = "with-standard"
-MERGED_ATTRIBUTE = "merged"
+WITH_STANDARD_ATTRIBUTE = "withStandard"
+UNMERGED_ATTRIBUTE = "unmerged"
diff --git a/hed/schema/hed_schema_entry.py b/hed/schema/hed_schema_entry.py
@@ -139,7 +139,6 @@ def __init__(self, *args, **kwargs):
         self._units = []
         self.units = []
         self.derivative_units = []
-        self.unit_class_entry = None
 
     def add_unit(self, unit_entry):
         """ Add the given unit entry to this unit class.
@@ -170,6 +169,12 @@ def finalize_entry(self, schema):
                     derivative_units[modifier.name + derived_unit] = unit_entry
         self.derivative_units = derivative_units
 
+    def __eq__(self, other):
+        if not super().__eq__(other):
+            return False
+        if self.units != other.units:
+            return False
+        return True
 
 class UnitEntry(HedSchemaEntry):
     """ A single unit entry with modifiers in the HedSchema. """
@@ -188,7 +193,6 @@ def finalize_entry(self, schema):
         """
         self.unit_modifiers = schema.get_modifiers_for_unit(self.name)
 
-
 class HedTagEntry(HedSchemaEntry):
     """ A single tag entry in the HedSchema. """
     def __init__(self, *args, **kwargs):
@@ -202,36 +206,6 @@ def __init__(self, *args, **kwargs):
         self._parent_tag = None
         self.tag_terms = tuple()
 
-    @staticmethod
-    def get_fake_tag_entry(tag, tags_to_identify):
-        """ Create a tag entry if a given a tag has a match in a list of possible short tags.
-
-        Parameters:
-            tag (str): The short/mid/long form tag to identify.
-            tags_to_identify (list): A list of lowercase short tags to identify.
-
-        Returns:
-            tuple:
-                - HedTagEntry or None: The fake entry showing the short tag name as the found tag.
-                - str: The remaining text after the located short tag, which may be empty.
-
-        Notes:
-             - The match is done left to right.
-
-        """
-        split_names = tag.split("/")
-        index = 0
-        for name in split_names:
-            if name.lower() in tags_to_identify:
-                fake_entry = HedTagEntry(name=tag[:index + len(name)], section=None)
-                fake_entry.long_tag_name = fake_entry.name
-                fake_entry.short_tag_name = name
-                return fake_entry, tag[index + len(name):]
-
-            index += len(name) + 1
-
-        return None, ""
-
     def any_parent_has_attribute(self, attribute):
         """ Check if tag (or parents) has the attribute.
 
@@ -271,6 +245,19 @@ def base_tag_has_attribute(self, tag_attribute):
 
         return base_entry.has_attribute(tag_attribute)
 
+    @property
+    def parent(self):
+        """Get the parent entry of this tag"""
+        return self._parent_tag
+
+    @property
+    def parent_name(self):
+        """Gets the parent tag entry name"""
+        if self._parent_tag:
+            return self._parent_tag.name
+        parent_name, _, child_name = self.name.rpartition("/")
+        return parent_name
+
     def finalize_entry(self, schema):
         """ Called once after schema loading to set state.
 

diff --git a/hed/schema/hed_schema_io.py b/hed/schema/hed_schema_io.py
@@ -152,7 +152,7 @@ def _load_schema_version(xml_version=None, xml_folder=None):
             final_hed_xml_file = hed_cache.get_hed_version_path(xml_version, library_name, xml_folder)
         hed_schema = load_schema(final_hed_xml_file)
     except HedFileError as e:
-        if e.error_type == HedExceptions.FILE_NOT_FOUND:
+        if e.code == HedExceptions.FILE_NOT_FOUND:
             hed_cache.cache_xml_versions(cache_folder=xml_folder)
             final_hed_xml_file = hed_cache.get_hed_version_path(xml_version, library_name, xml_folder)
             if not final_hed_xml_file: