Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update merged schemas to support rooted tags #654

Merged
merged 1 commit into from
Apr 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions hed/errors/error_messages.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,13 @@ def schema_error_hed_duplicate_node(tag, duplicate_tag_list, section):
f"{tag_join_delimiter}{tag_join_delimiter.join(duplicate_tag_list)}"


@hed_error(SchemaErrors.HED_SCHEMA_DUPLICATE_FROM_LIBRARY)
def schema_error_hed_duplicate_node(tag, duplicate_tag_list, section):
tag_join_delimiter = "\n\t"
return f"Duplicate term '{str(tag)}' was found in the library and in the standard schema in '{section}' section schema as:" + \
f"{tag_join_delimiter}{tag_join_delimiter.join(duplicate_tag_list)}"


@hed_error(SchemaErrors.HED_SCHEMA_ATTRIBUTE_INVALID)
def schema_error_unknown_attribute(attribute_name, source_tag):
return f"Attribute '{attribute_name}' used by '{source_tag}' was not defined in the schema, " \
Expand Down Expand Up @@ -249,6 +256,13 @@ def schema_warning_non_placeholder_class(tag_name, invalid_attribute_name):
f"Found {invalid_attribute_name} on {tag_name}"


@hed_error(SchemaWarnings.INVALID_ATTRIBUTE, default_severity=ErrorSeverity.ERROR)
def schema_error_invalid_attribute(tag_name, invalid_attribute_name):
return f"'{invalid_attribute_name}' should not be present in a loaded schema, found on '{tag_name}'." \
f"Something went very wrong."



@hed_error(SidecarErrors.BLANK_HED_STRING)
def sidecar_error_blank_hed_string():
return "No HED string found for Value or Category column."
Expand Down
2 changes: 2 additions & 0 deletions hed/errors/error_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ class SidecarErrors:
class SchemaErrors:
HED_SCHEMA_DUPLICATE_NODE = 'HED_SCHEMA_DUPLICATE_NODE'
HED_SCHEMA_ATTRIBUTE_INVALID = 'HED_SCHEMA_ATTRIBUTE_INVALID'
HED_SCHEMA_DUPLICATE_FROM_LIBRARY = "SCHEMA_LIBRARY_INVALID"


class SchemaWarnings:
Expand All @@ -113,6 +114,7 @@ class SchemaWarnings:
HED_SCHEMA_CHARACTER_INVALID = "HED_SCHEMA_CHARACTER_INVALID"
INVALID_CAPITALIZATION = 'invalidCaps'
NON_PLACEHOLDER_HAS_CLASS = 'NON_PLACEHOLDER_HAS_CLASS'
INVALID_ATTRIBUTE = "INVALID_ATTRIBUTE"


class DefinitionErrors:
Expand Down
17 changes: 13 additions & 4 deletions hed/errors/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,16 @@ class HedExceptions:
# These are actual schema issues, not that the file cannot be found or parsed
SCHEMA_HEADER_MISSING = 'HED_SCHEMA_HEADER_INVALID'
HED_SCHEMA_HEADER_INVALID = 'HED_SCHEMA_HEADER_INVALID'
BAD_HED_LIBRARY_NAME = 'badHedLibraryName'

SCHEMA_LIBRARY_INVALID = "SCHEMA_LIBRARY_INVALID"
BAD_HED_LIBRARY_NAME = 'SCHEMA_LIBRARY_INVALID'
BAD_WITH_STANDARD = "SCHEMA_LIBRARY_INVALID"
BAD_WITH_STANDARD_VERSION = "SCHEMA_LIBRARY_INVALID"
ROOTED_TAG_INVALID = "SCHEMA_LIBRARY_INVALID"
ROOTED_TAG_HAS_PARENT = "SCHEMA_LIBRARY_INVALID"
ROOTED_TAG_DOES_NOT_EXIST = "SCHEMA_LIBRARY_INVALID"
IN_LIBRARY_IN_UNMERGED = "SCHEMA_LIBRARY_INVALID"

HED_SCHEMA_VERSION_INVALID = 'HED_SCHEMA_VERSION_INVALID'
SCHEMA_START_MISSING = 'HED_WIKI_SEPARATOR_INVALID'
SCHEMA_END_INVALID = 'HED_WIKI_SEPARATOR_INVALID'
Expand All @@ -31,14 +40,14 @@ class HedExceptions:

class HedFileError(Exception):
"""Exception raised when a file cannot be parsed due to being malformed, file IO, etc."""
def __init__(self, error_type, message, filename, issues=None):
self.error_type = error_type
def __init__(self, code, message, filename, issues=None):
self.code = code
self.message = message
self.filename = filename
self.issues = issues
if self.issues is None:
self.issues = [
{'message': message,
ErrorContext.FILE_NAME: filename,
'error_code': error_type}
'code': code}
]
72 changes: 50 additions & 22 deletions hed/schema/hed_schema.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import os
import shutil

from hed.schema.hed_schema_constants import HedKey, HedSectionKey
from hed.schema import hed_schema_constants as constants
Expand All @@ -6,7 +8,7 @@
from hed.schema.schema_io.schema2wiki import HedSchema2Wiki

from hed.schema import schema_validation_util
from hed.schema.hed_schema_section import HedSchemaSection, HedSchemaTagSection
from hed.schema.hed_schema_section import HedSchemaSection, HedSchemaTagSection, HedSchemaUnitClassSection
from hed.errors import ErrorHandler
from hed.errors.error_types import ValidationErrors

Expand Down Expand Up @@ -84,7 +86,7 @@ def merged(self):


"""
return self.header_attributes.get(constants.MERGED_ATTRIBUTE, "")
return not self.header_attributes.get(constants.UNMERGED_ATTRIBUTE, "")

def get_save_header_attributes(self, save_merged=False):
""" returns the attributes that should be saved.
Expand All @@ -93,11 +95,12 @@ def get_save_header_attributes(self, save_merged=False):
sort_to_start = "!!!!!!!!!!!!!!"
header_attributes = dict(sorted(self.header_attributes.items(), key=lambda x: sort_to_start if x[0] == constants.VERSION_ATTRIBUTE else x[0], reverse=False))
if save_merged:
# make sure it's the last attribute(just to make sure it's in an order)
header_attributes.pop(constants.MERGED_ATTRIBUTE, None)
header_attributes[constants.MERGED_ATTRIBUTE] = "True"
header_attributes.pop(constants.UNMERGED_ATTRIBUTE, None)
else:
header_attributes.pop(constants.MERGED_ATTRIBUTE, None)
# make sure it's the last attribute(just to make sure it's in an order)
header_attributes.pop(constants.UNMERGED_ATTRIBUTE, None)
header_attributes[constants.UNMERGED_ATTRIBUTE] = "True"


return header_attributes

Expand Down Expand Up @@ -137,8 +140,8 @@ def get_as_mediawiki_string(self, save_merged=False):
""" Return the schema to a mediawiki string.

save_merged: bool
If true, this will save the schema as a merged schema if it is a "with-standard" schema.
If it is not a "with-standard" schema, this setting has no effect.
If true, this will save the schema as a merged schema if it is a "withStandard" schema.
If it is not a "withStandard" schema, this setting has no effect.
Returns:
str: The schema as a string in mediawiki format.

Expand All @@ -147,12 +150,12 @@ def get_as_mediawiki_string(self, save_merged=False):
output_strings = schema2wiki.process_schema(self, save_merged)
return '\n'.join(output_strings)

def get_as_xml_string(self, save_merged=False):
def get_as_xml_string(self, save_merged=True):
""" Return the schema to an XML string.

save_merged: bool
If true, this will save the schema as a merged schema if it is a "with-standard" schema.
If it is not a "with-standard" schema, this setting has no effect.
If true, this will save the schema as a merged schema if it is a "withStandard" schema.
If it is not a "withStandard" schema, this setting has no effect.
Returns:
str: Return the schema as an XML string.

Expand All @@ -161,33 +164,50 @@ def get_as_xml_string(self, save_merged=False):
xml_tree = schema2xml.process_schema(self, save_merged)
return schema_util._xml_element_2_str(xml_tree)

def save_as_mediawiki(self, save_merged=False):
def save_as_mediawiki(self, filename=None, save_merged=False):
""" Save as mediawiki to a temporary file.

filename: str
If present, move the resulting file to this location.
save_merged: bool
If true, this will save the schema as a merged schema if it is a "with-standard" schema.
If it is not a "with-standard" schema, this setting has no effect.
If true, this will save the schema as a merged schema if it is a "withStandard" schema.
If it is not a "withStandard" schema, this setting has no effect.

Returns:
str: The newly created schema filename.

"""
schema2wiki = HedSchema2Wiki()
output_strings = schema2wiki.process_schema(self, save_merged)
local_wiki_file = schema_util.write_strings_to_file(output_strings, ".mediawiki")
if filename:
directory = os.path.dirname(filename)
if directory and not os.path.exists(directory):
os.makedirs(directory)
shutil.move(local_wiki_file, filename)
return filename
return local_wiki_file

def save_as_xml(self, save_merged=False):
def save_as_xml(self, filename=None, save_merged=True):
""" Save as XML to a temporary file.

filename: str
If present, move the resulting file to this location.
save_merged: bool
If true, this will save the schema as a merged schema if it is a "withStandard" schema.
If it is not a "withStandard" schema, this setting has no effect.

Returns:
str: The name of the newly created schema file.
save_merged: bool
If true, this will save the schema as a merged schema if it is a "with-standard" schema.
If it is not a "with-standard" schema, this setting has no effect.
"""
schema2xml = HedSchema2XML()
xml_tree = schema2xml.process_schema(self, save_merged)
local_xml_file = schema_util.write_xml_tree_2_xml_file(xml_tree, ".xml")
if filename:
directory = os.path.dirname(filename)
if directory and not os.path.exists(directory):
os.makedirs(directory)
shutil.move(local_xml_file, filename)
return filename
return local_xml_file

def set_schema_prefix(self, schema_prefix):
Expand Down Expand Up @@ -691,7 +711,7 @@ def _create_empty_sections():
dictionaries[HedSectionKey.Attributes] = HedSchemaSection(HedSectionKey.Attributes)
dictionaries[HedSectionKey.UnitModifiers] = HedSchemaSection(HedSectionKey.UnitModifiers)
dictionaries[HedSectionKey.Units] = HedSchemaSection(HedSectionKey.Units)
dictionaries[HedSectionKey.UnitClasses] = HedSchemaSection(HedSectionKey.UnitClasses)
dictionaries[HedSectionKey.UnitClasses] = HedSchemaUnitClassSection(HedSectionKey.UnitClasses)
dictionaries[HedSectionKey.ValueClasses] = HedSchemaSection(HedSectionKey.ValueClasses)
dictionaries[HedSectionKey.AllTags] = HedSchemaTagSection(HedSectionKey.AllTags, case_sensitive=False)

Expand Down Expand Up @@ -768,6 +788,14 @@ def _get_attributes_for_section(self, key_class):
# ===============================================
# Semi private function used to create a schema in memory(usually from a source file)
# ===============================================
def _add_tag_to_dict(self, long_tag_name, key_class):
def _add_tag_to_dict(self, long_tag_name, new_entry, key_class):
# No reason we can't add this here always
if self.library and not self.merged:
new_entry.set_attribute_value(HedKey.InLibrary, self.library)

section = self._sections[key_class]
return section._add_to_dict(long_tag_name, new_entry)

def _create_tag_entry(self, long_tag_name, key_class):
section = self._sections[key_class]
return section._add_to_dict(long_tag_name)
return section._create_tag_entry(long_tag_name)
5 changes: 3 additions & 2 deletions hed/schema/hed_schema_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ class HedKey:
ValueClass = "valueClass"
RelatedTag = "relatedTag"
SuggestedTag = "suggestedTag"
Rooted = "rooted"

# All known properties
BoolProperty = 'boolProperty'
Expand Down Expand Up @@ -66,5 +67,5 @@ class HedKey:

VERSION_ATTRIBUTE = 'version'
LIBRARY_ATTRIBUTE = 'library'
WITH_STANDARD_ATTRIBUTE = "with-standard"
MERGED_ATTRIBUTE = "merged"
WITH_STANDARD_ATTRIBUTE = "withStandard"
UNMERGED_ATTRIBUTE = "unmerged"
51 changes: 19 additions & 32 deletions hed/schema/hed_schema_entry.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,6 @@ def __init__(self, *args, **kwargs):
self._units = []
self.units = []
self.derivative_units = []
self.unit_class_entry = None

def add_unit(self, unit_entry):
""" Add the given unit entry to this unit class.
Expand Down Expand Up @@ -170,6 +169,12 @@ def finalize_entry(self, schema):
derivative_units[modifier.name + derived_unit] = unit_entry
self.derivative_units = derivative_units

def __eq__(self, other):
if not super().__eq__(other):
return False
if self.units != other.units:
return False
return True

class UnitEntry(HedSchemaEntry):
""" A single unit entry with modifiers in the HedSchema. """
Expand All @@ -188,7 +193,6 @@ def finalize_entry(self, schema):
"""
self.unit_modifiers = schema.get_modifiers_for_unit(self.name)


class HedTagEntry(HedSchemaEntry):
""" A single tag entry in the HedSchema. """
def __init__(self, *args, **kwargs):
Expand All @@ -202,36 +206,6 @@ def __init__(self, *args, **kwargs):
self._parent_tag = None
self.tag_terms = tuple()

@staticmethod
def get_fake_tag_entry(tag, tags_to_identify):
""" Create a tag entry if a given a tag has a match in a list of possible short tags.

Parameters:
tag (str): The short/mid/long form tag to identify.
tags_to_identify (list): A list of lowercase short tags to identify.

Returns:
tuple:
- HedTagEntry or None: The fake entry showing the short tag name as the found tag.
- str: The remaining text after the located short tag, which may be empty.

Notes:
- The match is done left to right.

"""
split_names = tag.split("/")
index = 0
for name in split_names:
if name.lower() in tags_to_identify:
fake_entry = HedTagEntry(name=tag[:index + len(name)], section=None)
fake_entry.long_tag_name = fake_entry.name
fake_entry.short_tag_name = name
return fake_entry, tag[index + len(name):]

index += len(name) + 1

return None, ""

def any_parent_has_attribute(self, attribute):
""" Check if tag (or parents) has the attribute.

Expand Down Expand Up @@ -271,6 +245,19 @@ def base_tag_has_attribute(self, tag_attribute):

return base_entry.has_attribute(tag_attribute)

@property
def parent(self):
"""Get the parent entry of this tag"""
return self._parent_tag

@property
def parent_name(self):
"""Gets the parent tag entry name"""
if self._parent_tag:
return self._parent_tag.name
parent_name, _, child_name = self.name.rpartition("/")
return parent_name

def finalize_entry(self, schema):
""" Called once after schema loading to set state.

Expand Down
2 changes: 1 addition & 1 deletion hed/schema/hed_schema_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ def _load_schema_version(xml_version=None, xml_folder=None):
final_hed_xml_file = hed_cache.get_hed_version_path(xml_version, library_name, xml_folder)
hed_schema = load_schema(final_hed_xml_file)
except HedFileError as e:
if e.error_type == HedExceptions.FILE_NOT_FOUND:
if e.code == HedExceptions.FILE_NOT_FOUND:
hed_cache.cache_xml_versions(cache_folder=xml_folder)
final_hed_xml_file = hed_cache.get_hed_version_path(xml_version, library_name, xml_folder)
if not final_hed_xml_file:
Expand Down
Loading