From a7ccd3acc858ee45b90286c012f0c0742205cc3b Mon Sep 17 00:00:00 2001 From: IanCa Date: Wed, 3 Apr 2024 19:27:25 -0500 Subject: [PATCH 1/3] Implement validation with new 83 schema properties/rules --- hed/errors/error_types.py | 6 +- hed/errors/schema_error_messages.py | 18 +- hed/schema/hed_schema.py | 86 ++++---- hed/schema/hed_schema_constants.py | 20 ++ hed/schema/hed_schema_entry.py | 12 +- hed/schema/hed_schema_section.py | 10 +- hed/schema/schema_attribute_validators.py | 123 +++++------ hed/schema/schema_compliance.py | 56 +++-- hed/schema/schema_io/schema_util.py | 28 +++ hed/schema/schema_validation_util.py | 27 --- hed/validator/hed_validator.py | 3 +- tests/data/schema_tests/schema_utf8.mediawiki | 199 +++++++++--------- tests/schema/test_hed_schema.py | 13 +- .../test_schema_attribute_validators.py | 16 +- tests/schema/test_schema_entry.py | 28 +-- tests/schema/test_schema_util.py | 20 ++ tests/schema/test_schema_validation_util.py | 18 -- tests/validator/test_tag_validator.py | 12 +- 18 files changed, 358 insertions(+), 337 deletions(-) diff --git a/hed/errors/error_types.py b/hed/errors/error_types.py index c7b279ce..e6c9fc1d 100644 --- a/hed/errors/error_types.py +++ b/hed/errors/error_types.py @@ -133,17 +133,17 @@ class SchemaAttributeErrors: SCHEMA_DEPRECATED_INVALID = "SCHEMA_DEPRECATED_INVALID" SCHEMA_CHILD_OF_DEPRECATED = "SCHEMA_CHILD_OF_DEPRECATED" SCHEMA_ATTRIBUTE_VALUE_DEPRECATED = "SCHEMA_ATTRIBUTE_VALUE_DEPRECATED" - SCHEMA_SUGGESTED_TAG_INVALID = "SCHEMA_SUGGESTED_TAG_INVALID" - SCHEMA_UNIT_CLASS_INVALID = "SCHEMA_UNIT_CLASS_INVALID" - SCHEMA_VALUE_CLASS_INVALID = "SCHEMA_VALUE_CLASS_INVALID" SCHEMA_ALLOWED_CHARACTERS_INVALID = "SCHEMA_ALLOWED_CHARACTERS_INVALID" SCHEMA_IN_LIBRARY_INVALID = "SCHEMA_IN_LIBRARY_INVALID" + SCHEMA_ATTRIBUTE_NUMERIC_INVALID = "SCHEMA_ATTRIBUTE_NUMERIC_INVALID" SCHEMA_DEFAULT_UNITS_INVALID = "SCHEMA_DEFAULT_UNITS_INVALID" SCHEMA_DEFAULT_UNITS_DEPRECATED = "SCHEMA_DEFAULT_UNITS_DEPRECATED" SCHEMA_CONVERSION_FACTOR_NOT_POSITIVE = "SCHEMA_CONVERSION_FACTOR_NOT_POSITIVE" + SCHEMA_GENERIC_ATTRIBUTE_VALUE_INVALID = "SCHEMA_GENERIC_ATTRIBUTE_VALUE_INVALID" + class DefinitionErrors: # These are all DEFINITION_INVALID errors diff --git a/hed/errors/schema_error_messages.py b/hed/errors/schema_error_messages.py index 6a794059..4995abc9 100644 --- a/hed/errors/schema_error_messages.py +++ b/hed/errors/schema_error_messages.py @@ -74,22 +74,16 @@ def schema_error_SCHEMA_ATTRIBUTE_VALUE_DEPRECATED(tag, deprecated_suggestion, a f"and an alternative method of tagging should be used.") -@hed_error(SchemaAttributeErrors.SCHEMA_SUGGESTED_TAG_INVALID, +@hed_error(SchemaAttributeErrors.SCHEMA_GENERIC_ATTRIBUTE_VALUE_INVALID, actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_INVALID) -def schema_error_SCHEMA_SUGGESTED_TAG_INVALID(suggestedTag, invalidSuggestedTag, attribute_name): - return f"Tag '{suggestedTag}' has an invalid {attribute_name}: '{invalidSuggestedTag}'." +def schema_error_GENERIC_ATTRIBUTE_VALUE_INVALID(tag, invalid_value, attribute_name): + return f"Element '{tag}' has an invalid {attribute_name}: '{invalid_value}'." -@hed_error(SchemaAttributeErrors.SCHEMA_UNIT_CLASS_INVALID, +@hed_error(SchemaAttributeErrors.SCHEMA_ATTRIBUTE_NUMERIC_INVALID, actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_INVALID) -def schema_error_SCHEMA_UNIT_CLASS_INVALID(tag, unit_class, attribute_name): - return f"Tag '{tag}' has an invalid {attribute_name}: '{unit_class}'." - - -@hed_error(SchemaAttributeErrors.SCHEMA_VALUE_CLASS_INVALID, - actual_code=SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_INVALID) -def schema_error_SCHEMA_VALUE_CLASS_INVALID(tag, unit_class, attribute_name): - return f"Tag '{tag}' has an invalid {attribute_name}: '{unit_class}'." +def schema_error_SCHEMA_ATTRIBUTE_NUMERIC_INVALID(tag, invalid_value, attribute_name): + return f"Element '{tag}' has an invalid {attribute_name}: '{invalid_value}'. Should be numeric." @hed_error(SchemaAttributeErrors.SCHEMA_DEFAULT_UNITS_INVALID, diff --git a/hed/schema/hed_schema.py b/hed/schema/hed_schema.py index 85767fa8..e0392b22 100644 --- a/hed/schema/hed_schema.py +++ b/hed/schema/hed_schema.py @@ -1,7 +1,7 @@ import json import os -from hed.schema.hed_schema_constants import HedKey, HedSectionKey +from hed.schema.hed_schema_constants import HedKey, HedSectionKey, HedKey83 from hed.schema import hed_schema_constants as constants from hed.schema.schema_io import schema_util from hed.schema.schema_io.schema2xml import Schema2XML @@ -35,6 +35,7 @@ def __init__(self): self._sections = self._create_empty_sections() self.source_format = None # The type of file this was loaded from(mediawiki, xml, or owl - None if mixed) + self._schema83 = False # If True, this is an 8.3 style schema for validation/attribute purposes # =============================================== # Basic schema properties @@ -71,6 +72,17 @@ def schema_namespace(self): """Returns the schema namespace prefix""" return self._namespace + @property + def schema_83_props(self): + """Returns if this is an 8.3.0 or greater schema. + + Returns: + is_83_schema(bool): True if standard or partnered schema is 8.3.0 or greater.""" + if self._schema83 is not None: + return self._schema83 + + self._schema83 = schema_util.schema_version_greater_equal(self, "8.3.0") + def can_save(self): """ Returns if it's legal to save this schema. @@ -635,7 +647,7 @@ def _initialize_attributes(self, key_class): # =============================================== # Getters used to write out schema primarily. # =============================================== - def get_tag_attribute_names(self): + def get_tag_attribute_names_old(self): """ Return a dict of all allowed tag attributes. Returns: @@ -648,27 +660,6 @@ def get_tag_attribute_names(self): and not tag_entry.has_attribute(HedKey.UnitModifierProperty) and not tag_entry.has_attribute(HedKey.ValueClassProperty)} - def get_all_tag_attributes(self, tag_name, key_class=HedSectionKey.Tags): - """ Gather all attributes for a given tag name. - - Parameters: - tag_name (str): The name of the tag to check. - key_class (str): The type of attributes requested. e.g. Tag, Units, Unit modifiers, or attributes. - - Returns: - dict: A dictionary of attribute name and attribute value. - - Notes: - If keys is None, gets all normal hed tag attributes. - - """ - tag_entry = self._get_tag_entry(tag_name, key_class) - attributes = {} - if tag_entry: - attributes = tag_entry.attributes - - return attributes - # =============================================== # Private utility functions # =============================================== @@ -717,32 +708,39 @@ def _get_modifiers_for_unit(self, unit): valid_modifiers = self.unit_modifiers.get_entries_with_attribute(modifier_attribute_name) return valid_modifiers - def _add_element_property_attributes(self, attribute_dict): + def _add_element_property_attributes(self, attribute_dict, attribute_name): attributes = {attribute: entry for attribute, entry in self._sections[HedSectionKey.Attributes].items() - if entry.has_attribute(HedKey.ElementProperty)} + if entry.has_attribute(attribute_name)} attribute_dict.update(attributes) def _get_attributes_for_section(self, key_class): - """ Return the valid attributes for this section. + """Return the valid attributes for this section. Parameters: key_class (HedSectionKey): The HedKey for this section. Returns: - dict or HedSchemaSection: A dict of all the attributes and this section. - + dict: A dict of all the attributes for this section. """ - if key_class == HedSectionKey.Tags: - return self.get_tag_attribute_names() - elif key_class == HedSectionKey.Attributes: - prop_added_dict = {key: value for key, value in self._sections[HedSectionKey.Properties].items()} - self._add_element_property_attributes(prop_added_dict) - return prop_added_dict - elif key_class == HedSectionKey.Properties: + element_prop_key = HedKey83.ElementDomain if self.schema_83_props else HedKey.ElementProperty + + # Common logic for Attributes and Properties + if key_class in [HedSectionKey.Attributes, HedSectionKey.Properties]: prop_added_dict = {} - self._add_element_property_attributes(prop_added_dict) + if key_class == HedSectionKey.Attributes: + prop_added_dict = {key: value for key, value in self._sections[HedSectionKey.Properties].items()} + self._add_element_property_attributes(prop_added_dict, element_prop_key) return prop_added_dict + + if self.schema_83_props: + attrib_classes = { + HedSectionKey.UnitClasses: HedKey83.UnitClassDomain, + HedSectionKey.Units: HedKey83.UnitDomain, + HedSectionKey.UnitModifiers: HedKey83.UnitModifierDomain, + HedSectionKey.ValueClasses: HedKey83.ValueClassDomain, + HedSectionKey.Tags: HedKey83.TagDomain + } else: attrib_classes = { HedSectionKey.UnitClasses: HedKey.UnitClassProperty, @@ -750,14 +748,18 @@ def _get_attributes_for_section(self, key_class): HedSectionKey.UnitModifiers: HedKey.UnitModifierProperty, HedSectionKey.ValueClasses: HedKey.ValueClassProperty } - attrib_class = attrib_classes.get(key_class, None) - if attrib_class is None: - return [] + if key_class == HedSectionKey.Tags: + return self.get_tag_attribute_names_old() - attributes = {attribute: entry for attribute, entry in self._sections[HedSectionKey.Attributes].items() - if entry.has_attribute(attrib_class) or entry.has_attribute(HedKey.ElementProperty)} - return attributes + # Retrieve attributes based on the determined class + attrib_class = attrib_classes.get(key_class) + if not attrib_class: + return [] + attributes = {attribute: entry for attribute, entry in self._sections[HedSectionKey.Attributes].items() + if entry.has_attribute(attrib_class) or entry.has_attribute(element_prop_key)} + return attributes + # =============================================== # Semi private function used to create a schema in memory(usually from a source file) # =============================================== diff --git a/hed/schema/hed_schema_constants.py b/hed/schema/hed_schema_constants.py index 8067fa9e..f151e46a 100644 --- a/hed/schema/hed_schema_constants.py +++ b/hed/schema/hed_schema_constants.py @@ -71,6 +71,26 @@ class HedKey: IsInheritedProperty = 'isInheritedProperty' +class HedKey83: + UnitClassDomain = "unitClassDomain" + UnitDomain = "unitDomain" + UnitModifierDomain = "unitModifierDomain" + ValueClassDomain = "valueClassDomain" + ElementDomain = "elementDomain" + TagDomain = "tagDomain" + AnnotationProperty = "annotationProperty" + + BoolRange = "boolRange" + + # Fully new below this + TagRange = "tagRange" + NumericRange = "numericRange" + StringRange = "stringRange" + UnitClassRange = "unitClassRange" + UnitRange = "unitRange" + ValueClassRange = "valueClassRange" + + VERSION_ATTRIBUTE = 'version' LIBRARY_ATTRIBUTE = 'library' WITH_STANDARD_ATTRIBUTE = "withStandard" diff --git a/hed/schema/hed_schema_entry.py b/hed/schema/hed_schema_entry.py index 7b0e19b6..467a06ea 100644 --- a/hed/schema/hed_schema_entry.py +++ b/hed/schema/hed_schema_entry.py @@ -318,7 +318,7 @@ def _check_inherited_attribute_internal(self, attribute): return attribute_values - def _check_inherited_attribute(self, attribute, return_value=False, return_union=False): + def _check_inherited_attribute(self, attribute, return_value=False): """ Checks for the existence of an attribute in this entry and its parents. @@ -326,7 +326,6 @@ def _check_inherited_attribute(self, attribute, return_value=False, return_union attribute (str): The attribute to check for. return_value (bool): If True, returns the actual value of the attribute. If False, returns a boolean indicating the presence of the attribute. - return_union(bool): If True, return a union of all parent values. Returns: bool or any: Depending on the flag return_value, @@ -335,15 +334,17 @@ def _check_inherited_attribute(self, attribute, return_value=False, return_union Notes: - The existence of an attribute does not guarantee its validity. - For string attributes, the values are joined with a comma as a delimiter from all ancestors. + - For other attributes, only the value closest to the leaf is returned """ attribute_values = self._check_inherited_attribute_internal(attribute) if return_value: if not attribute_values: return None - if return_union: + try: return ",".join(attribute_values) - return attribute_values[0] + except TypeError: + return attribute_values[0] # Return the lowest level attribute if we don't want the union return bool(attribute_values) def base_tag_has_attribute(self, tag_attribute): @@ -397,8 +398,7 @@ def _finalize_inherited_attributes(self): self.inherited_attributes = self.attributes.copy() for attribute in self._section.inheritable_attributes: if self._check_inherited_attribute(attribute): - treat_as_string = not self.attribute_has_property(attribute, HedKey.BoolProperty) - self.inherited_attributes[attribute] = self._check_inherited_attribute(attribute, True, treat_as_string) + self.inherited_attributes[attribute] = self._check_inherited_attribute(attribute, True) def finalize_entry(self, schema): """ Called once after schema loading to set state. diff --git a/hed/schema/hed_schema_section.py b/hed/schema/hed_schema_section.py index 8d45bcba..f27fe6c1 100644 --- a/hed/schema/hed_schema_section.py +++ b/hed/schema/hed_schema_section.py @@ -1,5 +1,5 @@ from hed.schema.hed_schema_entry import HedSchemaEntry, UnitClassEntry, UnitEntry, HedTagEntry -from hed.schema.hed_schema_constants import HedSectionKey, HedKey +from hed.schema.hed_schema_constants import HedSectionKey, HedKey, HedKey83 entries_by_section = { HedSectionKey.Properties: HedSchemaEntry, @@ -254,8 +254,12 @@ def _group_by_top_level_tag(divide_list): def _finalize_section(self, hed_schema): # Find the attributes with the inherited property attribute_section = hed_schema.attributes - self.inheritable_attributes = [name for name, value in attribute_section.items() - if value.has_attribute(HedKey.IsInheritedProperty)] + if hed_schema.schema_83_props: + self.inheritable_attributes = [name for name, value in attribute_section.items() + if not value.has_attribute(HedKey83.AnnotationProperty)] + else: + self.inheritable_attributes = [name for name, value in attribute_section.items() + if value.has_attribute(HedKey.IsInheritedProperty)] # Hardcode in extension allowed as it is critical for validation in older schemas if not self.inheritable_attributes: diff --git a/hed/schema/schema_attribute_validators.py b/hed/schema/schema_attribute_validators.py index a48c4de0..6ee7a819 100644 --- a/hed/schema/schema_attribute_validators.py +++ b/hed/schema/schema_attribute_validators.py @@ -64,93 +64,44 @@ def attribute_is_deprecated(hed_schema, tag_entry, attribute_name): return issues -# todo: This needs to be refactored, these next several functions are near identical -def tag_exists_check(hed_schema, tag_entry, attribute_name): - """Check if the list of possible tags exists in the schema. +def item_exists_check(hed_schema, tag_entry, attribute_name, section_key): + """Check if the list of possible items exists in the schema and are not deprecated. Parameters: hed_schema (HedSchema): The schema to use for validation tag_entry (HedSchemaEntry): The schema entry for this tag. attribute_name (str): The name of this attribute + section_key (HedSectionKey): The section this item should be in. + This is generally passed via functools.partial Returns: issues(list): A list of issues from validating this attribute. """ issues = [] - possible_tags = tag_entry.attributes.get(attribute_name, "") - split_tags = possible_tags.split(",") - for org_tag in split_tags: - org_entry = hed_schema.tags.get(org_tag) - if org_tag and not org_entry: - issues += ErrorHandler.format_error(SchemaAttributeErrors.SCHEMA_SUGGESTED_TAG_INVALID, + item_values = tag_entry.attributes.get(attribute_name, "") + split_items = item_values.split(",") + + for item in split_items: + if not item: + continue + # todo: make a dict if any more added + if section_key == HedSectionKey.Tags: + item_entry = hed_schema.tags.get(item) + elif section_key == HedSectionKey.UnitClasses: + item_entry = hed_schema.unit_classes.get(item) + elif section_key == HedSectionKey.ValueClasses: + item_entry = hed_schema.value_classes.get(item) + else: + raise ValueError(f"Invalid item type: {section_key}") + + if not item_entry: + issues += ErrorHandler.format_error(SchemaAttributeErrors.SCHEMA_GENERIC_ATTRIBUTE_VALUE_INVALID, tag_entry.name, - org_tag, + item, attribute_name) - elif (org_entry and org_entry.has_attribute(HedKey.DeprecatedFrom) - and not tag_entry.has_attribute(HedKey.DeprecatedFrom)): + elif item_entry.has_attribute(HedKey.DeprecatedFrom) and not tag_entry.has_attribute(HedKey.DeprecatedFrom): issues += ErrorHandler.format_error(SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_DEPRECATED, tag_entry.name, - org_tag, - attribute_name) - - return issues - - -def unit_class_exists(hed_schema, tag_entry, attribute_name): - """Check if comma separated list is valid unit classes. - - Parameters: - hed_schema (HedSchema): The schema to use for validation - tag_entry (HedSchemaEntry): The schema entry for this tag. - attribute_name (str): The name of this attribute - Returns: - issues(list): A list of issues from validating this attribute. - """ - issues = [] - possible_unit_classes = tag_entry.attributes.get(attribute_name, "") - split_tags = possible_unit_classes.split(",") - for org_tag in split_tags: - unit_class_entry = hed_schema.unit_classes.get(org_tag) - if org_tag and not unit_class_entry: - issues += ErrorHandler.format_error(SchemaAttributeErrors.SCHEMA_UNIT_CLASS_INVALID, - tag_entry.name, - org_tag, - attribute_name) - elif (unit_class_entry and unit_class_entry.has_attribute(HedKey.DeprecatedFrom) - and not tag_entry.has_attribute(HedKey.DeprecatedFrom)): - issues += ErrorHandler.format_error(SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_DEPRECATED, - tag_entry.name, - org_tag, - attribute_name) - - return issues - - -def value_class_exists(hed_schema, tag_entry, attribute_name): - """Check if comma separated list is valid value classes. - - Parameters: - hed_schema (HedSchema): The schema to use for validation - tag_entry (HedSchemaEntry): The schema entry for this tag. - attribute_name (str): The name of this attribute - Returns: - issues(list): A list of issues from validating this attribute. - """ - issues = [] - possible_value_classes = tag_entry.attributes.get(attribute_name, "") - split_tags = possible_value_classes.split(",") - - for org_tag in split_tags: - value_class_entry = hed_schema.value_classes.get(org_tag) - if org_tag and not value_class_entry: - issues += ErrorHandler.format_error(SchemaAttributeErrors.SCHEMA_VALUE_CLASS_INVALID, - tag_entry.name, - org_tag, - attribute_name) - elif (value_class_entry and value_class_entry.has_attribute(HedKey.DeprecatedFrom) - and not tag_entry.has_attribute(HedKey.DeprecatedFrom)): - issues += ErrorHandler.format_error(SchemaAttributeErrors.SCHEMA_ATTRIBUTE_VALUE_DEPRECATED, - tag_entry.name, - org_tag, + item, attribute_name) return issues @@ -305,3 +256,27 @@ def in_library_check(hed_schema, tag_entry, attribute_name): tag_entry.name, library) return issues + + +def is_numeric_value(hed_schema, tag_entry, attribute_name): + """Check if the attribute is a valid numeric(float) value + + Parameters: + hed_schema (HedSchema): The schema to use for validation + tag_entry (HedSchemaEntry): The schema entry for this tag. + attribute_name (str): The name of this attribute + Returns: + issues(list): A list of issues from validating this attribute. + """ + issues = [] + + float_str = tag_entry.attributes.get(attribute_name, "") + + try: + float(float_str) + except ValueError: + issues += ErrorHandler.format_error(SchemaAttributeErrors.SCHEMA_ATTRIBUTE_NUMERIC_INVALID, + tag_entry.name, + float_str, + attribute_name) + return issues diff --git a/hed/schema/schema_compliance.py b/hed/schema/schema_compliance.py index 4549b1f4..703cc7cb 100644 --- a/hed/schema/schema_compliance.py +++ b/hed/schema/schema_compliance.py @@ -2,11 +2,12 @@ from hed.errors.error_types import ErrorContext, SchemaErrors, ErrorSeverity, SchemaAttributeErrors, SchemaWarnings from hed.errors.error_reporter import ErrorHandler, sort_issues -from hed.schema.hed_schema import HedSchema, HedKey, HedSectionKey +from hed.schema.hed_schema import HedSchema, HedKey, HedSectionKey, HedKey83 from hed.schema import schema_attribute_validators from hed.schema.schema_validation_util import validate_schema_tag_new, validate_schema_term_new, \ - schema_version_greater_equal, get_allowed_characters_by_name, get_problem_indexes, validate_schema_description_new + get_allowed_characters_by_name, get_problem_indexes, validate_schema_description_new from hed.schema.schema_validation_util_deprecated import validate_schema_tag, validate_schema_description, verify_no_brackets +from functools import partial def check_compliance(hed_schema, check_for_warnings=True, name=None, error_handler=None): @@ -47,13 +48,13 @@ def check_compliance(hed_schema, check_for_warnings=True, name=None, error_handl class SchemaValidator: """Validator class to wrap some code. In general, just call check_compliance.""" - attribute_validators = { - HedKey.SuggestedTag: [schema_attribute_validators.tag_exists_check], - HedKey.RelatedTag: [schema_attribute_validators.tag_exists_check], + attribute_validators_old = { + HedKey.SuggestedTag: [partial(schema_attribute_validators.item_exists_check, section_key=HedSectionKey.Tags)], + HedKey.RelatedTag: [partial(schema_attribute_validators.item_exists_check, section_key=HedSectionKey.Tags)], HedKey.UnitClass: [schema_attribute_validators.tag_is_placeholder_check, - schema_attribute_validators.unit_class_exists], + partial(schema_attribute_validators.item_exists_check, section_key=HedSectionKey.UnitClasses)], HedKey.ValueClass: [schema_attribute_validators.tag_is_placeholder_check, - schema_attribute_validators.value_class_exists], + partial(schema_attribute_validators.item_exists_check, section_key=HedSectionKey.ValueClasses)], # Rooted tag is implicitly verified on loading # HedKey.Rooted: [schema_attribute_validators.tag_exists_base_schema_check], HedKey.DeprecatedFrom: [schema_attribute_validators.tag_is_deprecated_check], @@ -62,12 +63,27 @@ class SchemaValidator: HedKey.ConversionFactor: [schema_attribute_validators.conversion_factor], HedKey.AllowedCharacter: [schema_attribute_validators.allowed_characters_check], HedKey.InLibrary: [schema_attribute_validators.in_library_check] - } # Known attribute validators + } # Known attribute validators( < 8.3.0) + + attribute_validators = { + HedKey.SuggestedTag: [], + HedKey.RelatedTag: [], + HedKey.UnitClass: [schema_attribute_validators.tag_is_placeholder_check], + HedKey.ValueClass: [schema_attribute_validators.tag_is_placeholder_check], + # Rooted tag is implicitly verified on loading + # HedKey.Rooted: [schema_attribute_validators.tag_exists_base_schema_check], + HedKey.DeprecatedFrom: [schema_attribute_validators.tag_is_deprecated_check], + HedKey.TakesValue: [schema_attribute_validators.tag_is_placeholder_check], + HedKey.DefaultUnits: [], + HedKey.ConversionFactor: [schema_attribute_validators.conversion_factor], + HedKey.AllowedCharacter: [schema_attribute_validators.allowed_characters_check], + HedKey.InLibrary: [schema_attribute_validators.in_library_check] + } # Known attribute validators ( > 8.3.0). Does not include range or domain validation, that's added later. def __init__(self, hed_schema, error_handler): self.hed_schema = hed_schema self.error_handler = error_handler - self._new_character_validation = schema_version_greater_equal(self.hed_schema, "8.3.0") + self._new_character_validation = hed_schema.schema_83_props def check_prologue_epilogue(self): issues = [] @@ -100,9 +116,25 @@ def check_attributes(self): attribute_name, source_tag=tag_entry.name) for attribute_name in tag_entry.attributes: - # Always check deprecated - validators = self.attribute_validators.get(attribute_name, []) \ - + [schema_attribute_validators.attribute_is_deprecated] + if self._new_character_validation: + validators = self.attribute_validators.get(attribute_name, []) \ + + [schema_attribute_validators.attribute_is_deprecated] + attribute_entry = self.hed_schema.get_tag_entry(attribute_name, HedSectionKey.Attributes) + if attribute_entry: + range_validators = { + HedKey83.TagRange: [partial(schema_attribute_validators.item_exists_check, section_key=HedSectionKey.Tags)], + HedKey83.NumericRange: [schema_attribute_validators.is_numeric_value], + HedKey83.StringRange: [], # Unclear what validation should be done here. + HedKey83.UnitClassRange: [partial(schema_attribute_validators.item_exists_check, section_key=HedSectionKey.UnitClasses)], + HedKey83.UnitRange: [schema_attribute_validators.unit_exists], + HedKey83.ValueClassRange: [partial(schema_attribute_validators.item_exists_check, section_key=HedSectionKey.ValueClasses)] + } + for range_attribute in attribute_entry.attributes: + validators += range_validators.get(range_attribute, []) + else: + # Always check deprecated + validators = self.attribute_validators_old.get(attribute_name, []) \ + + [schema_attribute_validators.attribute_is_deprecated] for validator in validators: self.error_handler.push_error_context(ErrorContext.SCHEMA_ATTRIBUTE, attribute_name) new_issues = validator(self.hed_schema, tag_entry, attribute_name) diff --git a/hed/schema/schema_io/schema_util.py b/hed/schema/schema_io/schema_util.py index dba0d365..df653ae9 100644 --- a/hed/schema/schema_io/schema_util.py +++ b/hed/schema/schema_io/schema_util.py @@ -5,6 +5,7 @@ import urllib.request from xml.dom import minidom from xml.etree import ElementTree +from semantic_version import Version # you can fill this in locally if you don't want to add it to environ. github_api_access_token = "" @@ -84,3 +85,30 @@ def xml_element_2_str(elem): rough_string = ElementTree.tostring(elem, method='xml') parsed = minidom.parseString(rough_string) return parsed.toprettyxml(indent=" ") + + +def schema_version_greater_equal(hed_schema, target_version): + """ Check if the given schema standard version is above target version + + Parameters: + hed_schema (HedSchema or HedSchemaGroup): If a schema group, checks if any version is above. + target_version (str): The semantic version to check against + + Returns: + bool: True if the version is above target_version + False if it is not, or it is ambiguous. + """ + # Do exhaustive checks for now, assuming nothing + schemas = [hed_schema.schema_for_namespace(schema_namespace) for schema_namespace in hed_schema.valid_prefixes] + candidate_versions = [schema.with_standard for schema in schemas if schema.with_standard] + if not candidate_versions: + # Check for a standard schema(potentially, but unlikely, more than one) + for schema in schemas: + if schema.library == "": + candidate_versions.append(schema.version_number) + target_version = Version(target_version) + for version in candidate_versions: + if Version(version) >= target_version: + return True + + return False \ No newline at end of file diff --git a/hed/schema/schema_validation_util.py b/hed/schema/schema_validation_util.py index fb7a6fee..a6f17798 100644 --- a/hed/schema/schema_validation_util.py +++ b/hed/schema/schema_validation_util.py @@ -75,33 +75,6 @@ def validate_schema_description_new(hed_entry): return issues_list -def schema_version_greater_equal(hed_schema, target_version): - """ Check if the given schema standard version is above target version - - Parameters: - hed_schema (HedSchema or HedSchemaGroup): If a schema group, checks if any version is above. - target_version (str): The semantic version to check against - - Returns: - bool: True if the version is above target_version - False if it is not, or it is ambiguous. - """ - # Do exhaustive checks for now, assuming nothing - schemas = [hed_schema.schema_for_namespace(schema_namespace) for schema_namespace in hed_schema.valid_prefixes] - candidate_versions = [schema.with_standard for schema in schemas if schema.with_standard] - if not candidate_versions: - # Check for a standard schema(potentially, but unlikely, more than one) - for schema in schemas: - if schema.library == "": - candidate_versions.append(schema.version_number) - target_version = Version(target_version) - for version in candidate_versions: - if Version(version) >= target_version: - return True - - return False - - def schema_version_for_library(hed_schema, library_name): """ Given the library name and hed schema object, return the version diff --git a/hed/validator/hed_validator.py b/hed/validator/hed_validator.py index ce21e71e..55c7f705 100644 --- a/hed/validator/hed_validator.py +++ b/hed/validator/hed_validator.py @@ -10,7 +10,6 @@ from hed.validator.def_validator import DefValidator from hed.validator.tag_util import UnitValueValidator, CharValidator, StringValidator, TagValidator, GroupValidator -from hed.schema.schema_validation_util import schema_version_greater_equal from hed.schema import HedSchema @@ -33,7 +32,7 @@ def __init__(self, hed_schema, def_dicts=None, definitions_allowed=False): self._def_validator = DefValidator(def_dicts, hed_schema) self._definitions_allowed = definitions_allowed - self._validate_characters = schema_version_greater_equal(hed_schema, "8.3.0") + self._validate_characters = schema.schema_83_props self._unit_validator = UnitValueValidator(modern_allowed_char_rules=self._validate_characters) self._char_validator = CharValidator(modern_allowed_char_rules=self._validate_characters) diff --git a/tests/data/schema_tests/schema_utf8.mediawiki b/tests/data/schema_tests/schema_utf8.mediawiki index 2cc5f437..dc86297f 100644 --- a/tests/data/schema_tests/schema_utf8.mediawiki +++ b/tests/data/schema_tests/schema_utf8.mediawiki @@ -12,24 +12,25 @@ HED version="8.3.0" unmerged="True" * # {takesValue, valueClass=textClass} '''NonAscii''' - * # {takesValue, valueClass=testUnicodeClass} + * # {takesValue, valueClass=testAllowTabClass} !# end schema + '''Unit classes''' [Unit classes and the units for the nodes.] * accelerationUnits {defaultUnits=m-per-s^2} -** m-per-s^2 {SIUnit, unitSymbol, conversionFactor=1.0} +** m-per-s^2 {SIUnit, unitSymbol, conversionFactor=1.0, allowedCharacter=caret} * angleUnits {defaultUnits=radian} ** radian {SIUnit, conversionFactor=1.0} ** rad {SIUnit, unitSymbol, conversionFactor=1.0} ** degree {conversionFactor=0.0174533} * areaUnits {defaultUnits=m^2} -** m^2 {SIUnit, unitSymbol, conversionFactor=1.0} +** m^2 {SIUnit, unitSymbol, conversionFactor=1.0, allowedCharacter=caret} * currencyUnits {defaultUnits=$}[Units indicating the worth of something.] ** dollar {conversionFactor=1.0} -** $ {unitPrefix, unitSymbol, conversionFactor=1.0} -** euro -** point +** $ {unitPrefix, unitSymbol, conversionFactor=1.0, allowedCharacter=dollar} +** euro [The official currency of a large subset of member countries of the European Union.] +** point [An arbitrary unit of value, usually an integer indicating reward or penalty.] * electricPotentialUnits {defaultUnits=uv} ** v {SIUnit, unitSymbol, conversionFactor=0.000001} ** Volt {SIUnit, conversionFactor=0.000001} @@ -41,10 +42,10 @@ HED version="8.3.0" unmerged="True" ** candela {SIUnit}[Units used to express light intensity.] ** cd {SIUnit, unitSymbol}[Units used to express light intensity.] * jerkUnits {defaultUnits=m-per-s^3} -** m-per-s^3 {unitSymbol, conversionFactor=1.0} -* magneticFieldUnits {defaultUnits=fT}[Units used to magnetic field intensity.] -** tesla {SIUnit, conversionFactor=10^-15} -** T {SIUnit, unitSymbol, conversionFactor=10^-15} +** m-per-s^3 {unitSymbol, conversionFactor=1.0, allowedCharacter=caret} +* magneticFieldUnits {defaultUnits=T} +** tesla {SIUnit, conversionFactor=10e-15} +** T {SIUnit, unitSymbol, conversionFactor=10e-15} * memorySizeUnits {defaultUnits=B} ** byte {SIUnit, conversionFactor=1.0} ** B {SIUnit, unitSymbol, conversionFactor=1.0} @@ -59,8 +60,9 @@ HED version="8.3.0" unmerged="True" ** m-per-s {SIUnit, unitSymbol, conversionFactor=1.0} ** mph {unitSymbol, conversionFactor=0.44704} ** kph {unitSymbol, conversionFactor=0.277778} -* temperatureUnits {defaultUnits=degree Celsius} -** degree Celsius {SIUnit, conversionFactor=1.0} +* temperatureUnits {defaultUnits=degree-Celsius} +** degree-Celsius {SIUnit, conversionFactor=1.0} +** degree Celsius {deprecatedFrom=8.2.0, SIUnit, conversionFactor=1.0}[Units are not allowed to have spaces. Use degree-Celsius or oC instead.] ** oC {SIUnit, unitSymbol, conversionFactor=1.0} * timeUnits {defaultUnits=s} ** second {SIUnit, conversionFactor=1.0} @@ -68,8 +70,9 @@ HED version="8.3.0" unmerged="True" ** day {conversionFactor=86400} ** minute {conversionFactor=60} ** hour {conversionFactor=3600}[Should be in 24-hour format.] +** year [Years do not have a constant conversion factor to seconds.] * volumeUnits {defaultUnits=m^3} -** m^3 {SIUnit, unitSymbol, conversionFactor=1.0} +** m^3 {SIUnit, unitSymbol, conversionFactor=1.0, allowedCharacter=caret} * weightUnits {defaultUnits=g} ** g {SIUnit, unitSymbol, conversionFactor=1.0} ** gram {SIUnit, conversionFactor=1.0} @@ -78,91 +81,99 @@ HED version="8.3.0" unmerged="True" '''Unit modifiers''' [Unit multiples and submultiples.] -* deca {SIUnitModifier, conversionFactor=10.0} [SI unit multiple representing 10^1.] -* da {SIUnitSymbolModifier, conversionFactor=10.0} [SI unit multiple representing 10^1.] -* hecto {SIUnitModifier, conversionFactor=100.0} [SI unit multiple representing 10^2.] -* h {SIUnitSymbolModifier, conversionFactor=100.0} [SI unit multiple representing 10^2.] -* kilo {SIUnitModifier, conversionFactor=1000.0} [SI unit multiple representing 10^3.] -* k {SIUnitSymbolModifier, conversionFactor=1000.0} [SI unit multiple representing 10^3.] -* mega {SIUnitModifier, conversionFactor=10^6} [SI unit multiple representing 10^6.] -* M {SIUnitSymbolModifier, conversionFactor=10^6} [SI unit multiple representing 10^6.] -* giga {SIUnitModifier, conversionFactor=10^9} [SI unit multiple representing 10^9.] -* G {SIUnitSymbolModifier, conversionFactor=10^9} [SI unit multiple representing 10^9.] -* tera {SIUnitModifier, conversionFactor=10^12} [SI unit multiple representing 10^12.] -* T {SIUnitSymbolModifier, conversionFactor=10^12} [SI unit multiple representing 10^12.] -* peta {SIUnitModifier, conversionFactor=10^15} [SI unit multiple representing 10^15.] -* P {SIUnitSymbolModifier, conversionFactor=10^15} [SI unit multiple representing 10^15.] -* exa {SIUnitModifier, conversionFactor=10^18} [SI unit multiple representing 10^18.] -* E {SIUnitSymbolModifier, conversionFactor=10^18} [SI unit multiple representing 10^18.] -* zetta {SIUnitModifier, conversionFactor=10^21} [SI unit multiple representing 10^21.] -* Z {SIUnitSymbolModifier, conversionFactor=10^21} [SI unit multiple representing 10^21.] -* yotta {SIUnitModifier, conversionFactor=10^24} [SI unit multiple representing 10^24.] -* Y {SIUnitSymbolModifier, conversionFactor=10^24} [SI unit multiple representing 10^24.] -* deci {SIUnitModifier, conversionFactor=0.1}[SI unit submultiple representing 10^-1.] -* d {SIUnitSymbolModifier, conversionFactor=0.1} [SI unit submultiple representing 10^-1.] -* centi {SIUnitModifier, conversionFactor=0.01} [SI unit submultiple representing 10^-2.] -* c {SIUnitSymbolModifier, conversionFactor=0.01} [SI unit submultiple representing 10^-2.] -* milli {SIUnitModifier, conversionFactor=0.001} [SI unit submultiple representing 10^-3.] -* m {SIUnitSymbolModifier, conversionFactor=0.001} [SI unit submultiple representing 10^-3.] -* micro {SIUnitModifier, conversionFactor=10^-6} [SI unit submultiple representing 10^-6.] -* u {SIUnitSymbolModifier, conversionFactor=10^-6} [SI unit submultiple representing 10^-6.] -* nano {SIUnitModifier, conversionFactor=10^-9} [SI unit submultiple representing 10^-9.] -* n {SIUnitSymbolModifier, conversionFactor=10^-9} [SI unit submultiple representing 10^-9.] -* pico {SIUnitModifier, conversionFactor=10^-12} [SI unit submultiple representing 10^-12.] -* p {SIUnitSymbolModifier, conversionFactor=10^-12} [SI unit submultiple representing 10^-12.] -* femto {SIUnitModifier, conversionFactor=10^-15} [SI unit submultiple representing 10^-15.] -* f {SIUnitSymbolModifier, conversionFactor=10^-15} [SI unit submultiple representing 10^-15.] -* atto {SIUnitModifier, conversionFactor=10^-18} [SI unit submultiple representing 10^-18.] -* a {SIUnitSymbolModifier, conversionFactor=10^-18} [SI unit submultiple representing 10^-18.] -* zepto {SIUnitModifier, conversionFactor=10^-21} [SI unit submultiple representing 10^-21.] -* z {SIUnitSymbolModifier, conversionFactor=10^-21} [SI unit submultiple representing 10^-21.] -* yocto {SIUnitModifier, conversionFactor=10^-24} [SI unit submultiple representing 10^-24.] -* y {SIUnitSymbolModifier, conversionFactor=10^-24} [SI unit submultiple representing 10^-24.] - - -'''Value classes''' [Specification of the rules for the values provided by users.] -* dateTimeClass {allowedCharacter=digits,allowedCharacter=T,allowedCharacter=-,allowedCharacter=:}[Date-times should conform to ISO8601 date-time format YYYY-MM-DDThh:mm:ss. Any variation on the full form is allowed.] -* nameClass {allowedCharacter=letters,allowedCharacter=digits,allowedCharacter=_,allowedCharacter=-}[Value class designating values that have the characteristics of node names. The allowed characters are alphanumeric, hyphen, and underbar.] -* numericClass {allowedCharacter=digits,allowedCharacter=E,allowedCharacter=e,allowedCharacter=+,allowedCharacter=-,allowedCharacter=.}[Value must be a valid numerical value.] -* posixPath {allowedCharacter=digits,allowedCharacter=letters,allowedCharacter=/,allowedCharacter=:}[Posix path specification.] -* textClass {allowedCharacter=letters, allowedCharacter=digits, allowedCharacter=blank, allowedCharacter=+, allowedCharacter=-, allowedCharacter=:, allowedCharacter=;, allowedCharacter=., allowedCharacter=/, allowedCharacter=(, allowedCharacter=), allowedCharacter=?, allowedCharacter=*, allowedCharacter=%, allowedCharacter=$, allowedCharacter=@}[Value class designating values that have the characteristics of text such as in descriptions.] -* testUnicodeClass {allowedCharacter=letters, allowedCharacter=nonascii, allowedCharacter=digits, allowedCharacter=blank, allowedCharacter=+, allowedCharacter=-, allowedCharacter=:, allowedCharacter=;, allowedCharacter=., allowedCharacter=/, allowedCharacter=(, allowedCharacter=), allowedCharacter=?, allowedCharacter=*, allowedCharacter=%, allowedCharacter=$, allowedCharacter=@}[Test class to see if unicode is allowed] +* deca {SIUnitModifier, conversionFactor=10.0} [SI unit multiple representing 10e1.] +* da {SIUnitSymbolModifier, conversionFactor=10.0} [SI unit multiple representing 10e1.] +* hecto {SIUnitModifier, conversionFactor=100.0} [SI unit multiple representing 10e2.] +* h {SIUnitSymbolModifier, conversionFactor=100.0} [SI unit multiple representing 10e2.] +* kilo {SIUnitModifier, conversionFactor=1000.0} [SI unit multiple representing 10e3.] +* k {SIUnitSymbolModifier, conversionFactor=1000.0} [SI unit multiple representing 10e3.] +* mega {SIUnitModifier, conversionFactor=10e6} [SI unit multiple representing 10e6.] +* M {SIUnitSymbolModifier, conversionFactor=10e6} [SI unit multiple representing 10e6.] +* giga {SIUnitModifier, conversionFactor=10e9} [SI unit multiple representing 10e9.] +* G {SIUnitSymbolModifier, conversionFactor=10e9} [SI unit multiple representing 10e9.] +* tera {SIUnitModifier, conversionFactor=10e12} [SI unit multiple representing 10e12.] +* T {SIUnitSymbolModifier, conversionFactor=10e12} [SI unit multiple representing 10e12.] +* peta {SIUnitModifier, conversionFactor=10e15} [SI unit multiple representing 10e15.] +* P {SIUnitSymbolModifier, conversionFactor=10e15} [SI unit multiple representing 10e15.] +* exa {SIUnitModifier, conversionFactor=10e18} [SI unit multiple representing 10e18.] +* E {SIUnitSymbolModifier, conversionFactor=10e18} [SI unit multiple representing 10e18.] +* zetta {SIUnitModifier, conversionFactor=10e21} [SI unit multiple representing 10e21.] +* Z {SIUnitSymbolModifier, conversionFactor=10e21} [SI unit multiple representing 10e21.] +* yotta {SIUnitModifier, conversionFactor=10e24} [SI unit multiple representing 10e24.] +* Y {SIUnitSymbolModifier, conversionFactor=10e24} [SI unit multiple representing 10e24.] +* deci {SIUnitModifier, conversionFactor=0.1}[SI unit submultiple representing 10e-1.] +* d {SIUnitSymbolModifier, conversionFactor=0.1} [SI unit submultiple representing 10e-1.] +* centi {SIUnitModifier, conversionFactor=0.01} [SI unit submultiple representing 10e-2.] +* c {SIUnitSymbolModifier, conversionFactor=0.01} [SI unit submultiple representing 10e-2.] +* milli {SIUnitModifier, conversionFactor=0.001} [SI unit submultiple representing 10e-3.] +* m {SIUnitSymbolModifier, conversionFactor=0.001} [SI unit submultiple representing 10e-3.] +* micro {SIUnitModifier, conversionFactor=10e-6} [SI unit submultiple representing 10e-6.] +* u {SIUnitSymbolModifier, conversionFactor=10e-6} [SI unit submultiple representing 10e-6.] +* nano {SIUnitModifier, conversionFactor=10e-9} [SI unit submultiple representing 10e-9.] +* n {SIUnitSymbolModifier, conversionFactor=10e-9} [SI unit submultiple representing 10e-9.] +* pico {SIUnitModifier, conversionFactor=10e-12} [SI unit submultiple representing 10e-12.] +* p {SIUnitSymbolModifier, conversionFactor=10e-12} [SI unit submultiple representing 10e-12.] +* femto {SIUnitModifier, conversionFactor=10e-15} [SI unit submultiple representing 10e-15.] +* f {SIUnitSymbolModifier, conversionFactor=10e-15} [SI unit submultiple representing 10e-15.] +* atto {SIUnitModifier, conversionFactor=10e-18} [SI unit submultiple representing 10e-18.] +* a {SIUnitSymbolModifier, conversionFactor=10e-18} [SI unit submultiple representing 10e-18.] +* zepto {SIUnitModifier, conversionFactor=10e-21} [SI unit submultiple representing 10e-21.] +* z {SIUnitSymbolModifier, conversionFactor=10e-21} [SI unit submultiple representing 10e-21.] +* yocto {SIUnitModifier, conversionFactor=10e-24} [SI unit submultiple representing 10e-24.] +* y {SIUnitSymbolModifier, conversionFactor=10e-24} [SI unit submultiple representing 10e-24.] + + +'''Value classes''' [Rules for the values provided for placeholders.] +* dateTimeClass {allowedCharacter=digits,allowedCharacter=T,allowedCharacter=hyphen,allowedCharacter=colon}[Date-times should conform to ISO8601 date-time format YYYY-MM-DDThh:mm:ss.000000Z (year, month, day, hour (24h), minute, second, optional fractional seconds, and optional UTC time indicator. Any variation on the full form is allowed.] +* nameClass {allowedCharacter=letters,allowedCharacter=digits,allowedCharacter=underscore,allowedCharacter=hyphen}[Value class designating values that have the characteristics of node names. The allowed characters are alphanumeric, hyphen, and underscore.] +* numericClass {allowedCharacter=digits,allowedCharacter=E,allowedCharacter=e,allowedCharacter=plus,allowedCharacter=hyphen,allowedCharacter=period}[Value must be a valid numerical value.] +* posixPath {allowedCharacter=digits,allowedCharacter=letters,allowedCharacter=slash,allowedCharacter=colon}[Posix path specification.] +* textClass {allowedCharacter=text}[Values that have the characteristics of text such as in descriptions. The text characters include printable characters (32 <= ASCII < 127) excluding comma, square bracket and curly braces as well as nonascii (ASCII codes > 127).] +* testAllowTabClass {allowedCharacter=text, allowedCharacter=tab}[Test class to see if tab is allowed] + '''Schema attributes''' [Allowed attribute modifiers of other sections of the schema.] -* allowedCharacter {valueClassProperty}[A schema attribute of value classes specifying a special character that is allowed in expressing the value of a placeholder. Normally the allowed characters are listed individually. However, the word letters designates the upper and lower case alphabetic characters and the word digits designates the digits 0-9. The word blank designates the blank character.] -* conversionFactor {unitProperty, unitModifierProperty}[The multiplicative factor to multiply these units to convert to default units.] -* deprecatedFrom {elementProperty}[Indicates that this element is deprecated. The value of the attribute is the latest schema version in which the element appeared in undeprecated form.] -* defaultUnits {unitClassProperty}[A schema attribute of unit classes specifying the default units to use if the placeholder has a unit class but the substituted value has no units.] -* extensionAllowed {boolProperty, nodeProperty, isInheritedProperty}[A schema attribute indicating that users can add unlimited levels of child nodes under this tag. This tag is propagated to child nodes with the exception of the hashtag placeholders.] -* inLibrary {elementProperty} [Indicates this schema element came from the named library schema, not the standard schema. This attribute is added by tools when a library schema is merged into its partnered standard schema.] -* recommended {boolProperty, nodeProperty}[A schema attribute indicating that the event-level HED string should include this tag.] -* relatedTag {nodeProperty, isInheritedProperty}[A schema attribute suggesting HED tags that are closely related to this tag. This attribute is used by tagging tools.] -* requireChild {boolProperty, nodeProperty}[A schema attribute indicating that one of the node elements descendants must be included when using this tag.] -* required {boolProperty, nodeProperty}[A schema attribute indicating that every event-level HED string should include this tag.] -* reserved {boolProperty, nodeProperty}[A schema attribute indicating that this tag has special meaning and requires special handling by tools.] -* rooted {nodeProperty}[Indicates a top-level library schema node is identical to a node of the same name in the partnered standard schema. This attribute can only appear in nodes that have the inLibrary schema attribute.] -* SIUnit {boolProperty, unitProperty}[A schema attribute indicating that this unit element is an SI unit and can be modified by multiple and submultiple names. Note that some units such as byte are designated as SI units although they are not part of the standard.] -* SIUnitModifier {boolProperty, unitModifierProperty}[A schema attribute indicating that this SI unit modifier represents a multiple or submultiple of a base unit rather than a unit symbol.] -* SIUnitSymbolModifier {boolProperty, unitModifierProperty}[A schema attribute indicating that this SI unit modifier represents a multiple or submultiple of a unit symbol rather than a base symbol.] -* suggestedTag {nodeProperty, isInheritedProperty}[A schema attribute that indicates another tag that is often associated with this tag. This attribute is used by tagging tools to provide tagging suggestions.] -* tagGroup {boolProperty, nodeProperty}[A schema attribute indicating the tag can only appear inside a tag group.] -* takesValue {boolProperty, nodeProperty}[A schema attribute indicating the tag is a hashtag placeholder that is expected to be replaced with a user-defined value.] -* topLevelTagGroup {boolProperty, nodeProperty}[A schema attribute indicating that this tag (or its descendants) can only appear in a top-level tag group. A tag group can have at most one tag with this attribute.] -* unique {boolProperty, nodeProperty}[A schema attribute indicating that only one of this tag or its descendants can be used in the event-level HED string.] -* unitClass {nodeProperty}[A schema attribute specifying which unit class this value tag belongs to.] -* unitPrefix {boolProperty, unitProperty}[A schema attribute applied specifically to unit elements to designate that the unit indicator is a prefix (e.g., dollar sign in the currency units).] -* unitSymbol {boolProperty, unitProperty}[A schema attribute indicating this tag is an abbreviation or symbol representing a type of unit. Unit symbols represent both the singular and the plural and thus cannot be pluralized.] -* valueClass {nodeProperty}[A schema attribute specifying which value class this value tag belongs to.] +* allowedCharacter {unitDomain, unitModifierDomain, valueClassDomain, stringRange}[A special character that is allowed in expressing the value of a placeholder of a specified value class. Allowed characters may be listed individual, named individually, or named as a group as specified in Section 2.2 Character sets and restrictions of the HED specification.] +* conversionFactor {unitDomain, unitModifierDomain, numericRange}[The factor to multiply these units or unit modifiers by to convert to default units.] +* defaultUnits {unitClassDomain, unitRange}[The default units to use if the placeholder has a unit class but the substituted value has no units.] +* deprecatedFrom {elementDomain, stringRange}[The latest schema version in which the element was not deprecated.] +* extensionAllowed {tagDomain, boolRange}[Users can add unlimited levels of child nodes under this tag. This tag is propagated to child nodes with the exception of the hashtag placeholders.] +* hedId {annotationProperty, elementDomain, stringRange}[The unique identifier of this element in the HED namespace.] +* inLibrary {elementDomain, stringRange} [The named library schema that this schema element is from. This attribute is added by tools when a library schema is merged into its partnered standard schema.] +* relatedTag {tagDomain, tagRange}[A HED tag that is closely related to this tag. This attribute is used by tagging tools.] +* requireChild {annotationProperty, tagDomain, boolRange}[This tag must have a descendent.] +* reserved {tagDomain, boolRange}[This tag has special meaning and requires special handling by tools.] +* rooted {annotationProperty, tagDomain, tagRange}[This top-level library schema node should have a parent which is the indicated node in the partnered standard schema.] +* SIUnit {unitDomain, boolRange}[This unit element is an SI unit and can be modified by multiple and sub-multiple names. Note that some units such as byte are designated as SI units although they are not part of the standard.] +* SIUnitModifier {unitModifierDomain, boolRange}[This SI unit modifier represents a multiple or sub-multiple of a base unit rather than a unit symbol.] +* SIUnitSymbolModifier {unitModifierDomain, boolRange}[This SI unit modifier represents a multiple or sub-multiple of a unit symbol rather than a base symbol.] +* suggestedTag {tagDomain, stringRange}[A tag that is often associated with this tag. This attribute is used by tagging tools to provide tagging suggestions.] +* tagGroup {tagDomain, boolRange}[This tag can only appear inside a tag group.] +* takesValue {annotationProperty, tagDomain, boolRange}[This tag is a hashtag placeholder that is expected to be replaced with a user-defined value.] +* topLevelTagGroup {tagDomain, boolRange}[This tag (or its descendants) can only appear in a top-level tag group. There are additional tag-specific restrictions on what other tags can appear in the group with this tag.] +* unique {tagDomain,boolRange}[Only one of this tag or its descendants can be used in the event-level HED string.] +* unitClass {tagDomain, unitClassRange}[The unit class that the value of a placeholder node can belong to.] +* unitPrefix {unitDomain, boolRange}[This unit is a prefix unit (e.g., dollar sign in the currency units).] +* unitSymbol {unitDomain, boolRange}[This tag is an abbreviation or symbol representing a type of unit. Unit symbols represent both the singular and the plural and thus cannot be pluralized.] +* valueClass {tagDomain, valueClassRange}[Type of value taken on by the value of a placeholder node.] +* testUnicodeClass {allowedCharacter=letters, allowedCharacter=nonascii, allowedCharacter=digits, allowedCharacter=blank, allowedCharacter=+, allowedCharacter=-, allowedCharacter=:, allowedCharacter=;, allowedCharacter=., allowedCharacter=/, allowedCharacter=(, allowedCharacter=), allowedCharacter=?, allowedCharacter=*, allowedCharacter=%, allowedCharacter=$, allowedCharacter=@}[Test class to see if unicode is allowed] '''Properties''' [Properties of the schema attributes themselves. These are used for schema handling and verification.] -* boolProperty [Indicates that the schema attribute represents something that is either true or false and does not have a value. Attributes without this value are assumed to have string values.] -* elementProperty [Indicates this schema attribute can apply to any type of element(tag term, unit class, etc).] -* isInheritedProperty [Indicates that this attribute is inherited by child nodes. This property only applies to schema attributes for nodes.] -* nodeProperty [Indicates this schema attribute applies to node (tag-term) elements. This was added to allow for an attribute to apply to multiple elements.] -* unitClassProperty [Indicates that the schema attribute is meant to be applied to unit classes.] -* unitModifierProperty [Indicates that the schema attribute is meant to be applied to unit modifier classes.] -* unitProperty [Indicates that the schema attribute is meant to be applied to units within a unit class.] -* valueClassProperty [Indicates that the schema attribute is meant to be applied to value classes.] +* annotationProperty [This schema attribute is inherited by child nodes. This property only applies to schema attributes for nodes.] +* boolRange [This schema attribute's value can be true or false. This property was formerly named boolProperty.] +* elementDomain [This schema attribute can apply to any type of element (tag term, unit class, etc). This property was formerly named elementProperty.] +* tagDomain [This schema attribute can apply to node (tag-term) elements. This was added so attributes could apply to multiple types of elements. This property was formerly named nodeProperty.] +* tagRange [This schema attribute's value can be a node. This property was formerly named nodeProperty.] +* numericRange [This schema attribute's value can be numeric.] +* stringRange [This schema attribute's value can be a string.] +* unitClassDomain [This schema attribute can apply to unit classes. This property was formerly named unitClassProperty.] +* unitClassRange [This schema attribute's value can be a unit class.] +* unitModifierDomain [This schema attribute can apply to unit modifiers. This property was formerly named unitModifierProperty.] +* unitDomain [This schema attribute can apply to units. This property was formerly named unitProperty.] +* unitRange [This schema attribute's value can be units.] +* valueClassDomain [This schema attribute can apply to value classes. This property was formerly named valueClassProperty.] +* valueClassRange [This schema attribute's value can be a value class.] + '''Epilogue''' diff --git a/tests/schema/test_hed_schema.py b/tests/schema/test_hed_schema.py index 21fcd098..5f499a0e 100644 --- a/tests/schema/test_hed_schema.py +++ b/tests/schema/test_hed_schema.py @@ -84,8 +84,7 @@ def test_tag_attribute(self): 'Test string: %s. Attribute: %s.' % (test_string, attribute)) def test_get_all_tag_attributes(self): - test_string = HedString("Jerk-rate/#", self.hed_schema_3g) - tag_props = self.hed_schema_3g.get_all_tag_attributes(test_string) + tag_props = self.hed_schema_3g._get_tag_entry("Jerk-rate/#").attributes expected_props = { "takesValue": "true", "valueClass": "numericClass", @@ -93,19 +92,13 @@ def test_get_all_tag_attributes(self): } self.assertCountEqual(tag_props, expected_props) - tag_props = self.hed_schema_3g.get_all_tag_attributes("This/Is/Not/A/Tag") - expected_props = { - } - self.assertCountEqual(tag_props, expected_props) - - test_string = HedString("Statistical-value", self.hed_schema_3g) - tag_props = self.hed_schema_3g.get_all_tag_attributes(test_string) + tag_props = self.hed_schema_3g._get_tag_entry("Statistical-value").attributes expected_props = { HedKey.ExtensionAllowed: "true", } self.assertCountEqual(tag_props, expected_props) # also test long form. - tag_props = self.hed_schema_3g.get_all_tag_attributes("Property/Data-property/Data-value/Statistical-value") + tag_props = self.hed_schema_3g._get_tag_entry("Property/Data-property/Data-value/Statistical-value").attributes self.assertCountEqual(tag_props, expected_props) def test_get_hed_xml_version(self): diff --git a/tests/schema/test_schema_attribute_validators.py b/tests/schema/test_schema_attribute_validators.py index afa2484a..9d9a6bf1 100644 --- a/tests/schema/test_schema_attribute_validators.py +++ b/tests/schema/test_schema_attribute_validators.py @@ -1,7 +1,7 @@ import unittest import copy -from hed.schema import schema_attribute_validators +from hed.schema import schema_attribute_validators, HedSectionKey from hed import load_schema_version @@ -21,12 +21,12 @@ def test_util_placeholder(self): def test_util_suggested(self): tag_entry = self.hed_schema.tags["Event/Sensory-event"] attribute_name = "suggestedTag" - self.assertFalse(schema_attribute_validators.tag_exists_check(self.hed_schema, tag_entry, attribute_name)) + self.assertFalse(schema_attribute_validators.item_exists_check(self.hed_schema, tag_entry, attribute_name, HedSectionKey.Tags)) tag_entry = self.hed_schema.tags["Property"] - self.assertFalse(schema_attribute_validators.tag_exists_check(self.hed_schema, tag_entry, attribute_name)) + self.assertFalse(schema_attribute_validators.item_exists_check(self.hed_schema, tag_entry, attribute_name, HedSectionKey.Tags)) tag_entry = copy.deepcopy(tag_entry) tag_entry.attributes["suggestedTag"] = "InvalidSuggestedTag" - self.assertTrue(schema_attribute_validators.tag_exists_check(self.hed_schema, tag_entry, attribute_name)) + self.assertTrue(schema_attribute_validators.item_exists_check(self.hed_schema, tag_entry, attribute_name, HedSectionKey.Tags)) def test_util_rooted(self): tag_entry = self.hed_schema.tags["Event"] @@ -44,20 +44,20 @@ def test_util_rooted(self): def test_unit_class_exists(self): tag_entry = self.hed_schema.tags["Weight/#"] attribute_name = "unitClass" - self.assertFalse(schema_attribute_validators.unit_class_exists(self.hed_schema, tag_entry, attribute_name)) + self.assertFalse(schema_attribute_validators.item_exists_check(self.hed_schema, tag_entry, attribute_name, HedSectionKey.UnitClasses)) tag_entry = copy.deepcopy(tag_entry) tag_entry.attributes["unitClass"] = "fakeClass" - self.assertTrue(schema_attribute_validators.unit_class_exists(self.hed_schema, tag_entry, attribute_name)) + self.assertTrue(schema_attribute_validators.item_exists_check(self.hed_schema, tag_entry, attribute_name, HedSectionKey.UnitClasses)) def test_value_class_exists(self): tag_entry = self.hed_schema.tags["Weight/#"] attribute_name = "valueClass" - self.assertFalse(schema_attribute_validators.value_class_exists(self.hed_schema, tag_entry, attribute_name)) + self.assertFalse(schema_attribute_validators.item_exists_check(self.hed_schema, tag_entry, attribute_name, HedSectionKey.ValueClasses)) tag_entry = copy.deepcopy(tag_entry) tag_entry.attributes["valueClass"] = "fakeClass" - self.assertTrue(schema_attribute_validators.value_class_exists(self.hed_schema, tag_entry, attribute_name)) + self.assertTrue(schema_attribute_validators.item_exists_check(self.hed_schema, tag_entry, attribute_name, HedSectionKey.ValueClasses)) def test_unit_exists(self): tag_entry = self.hed_schema.unit_classes["accelerationUnits"] diff --git a/tests/schema/test_schema_entry.py b/tests/schema/test_schema_entry.py index 0985ce2b..4cc23c10 100644 --- a/tests/schema/test_schema_entry.py +++ b/tests/schema/test_schema_entry.py @@ -19,39 +19,27 @@ def setUp(self): self.child_entry2 = MockEntry({'size': 'medium', 'material': 'wood', 'number': 5}, parent=self.child_entry1) def test_check_inherited_attribute(self): - # Test attribute present in the current entry but not in parents - self.assertEqual(self.child_entry2._check_inherited_attribute('material', return_value=True, return_union=False), 'wood') - - # Test attribute present in the parent but not in the current entry - self.assertEqual(self.child_entry2._check_inherited_attribute('color', return_value=True, return_union=False), 'green') + self.assertEqual(self.child_entry2._check_inherited_attribute('material', return_value=True), 'wood') # Test attribute present in the parent but not in the current entry, treat_as_string=True - self.assertEqual(self.child_entry2._check_inherited_attribute('color', return_value=True, return_union=True), 'green,blue') + self.assertEqual(self.child_entry2._check_inherited_attribute('color', return_value=True), 'green,blue') # Test attribute present in the current entry and in parents, treat_as_string=True - self.assertEqual(self.child_entry2._check_inherited_attribute('size', return_value=True, return_union=True), 'medium,large') + self.assertEqual(self.child_entry2._check_inherited_attribute('size', return_value=True), 'medium,large') # Test attribute not present anywhere - self.assertIsNone(self.child_entry2._check_inherited_attribute('weight', return_value=True, return_union=False)) + self.assertIsNone(self.child_entry2._check_inherited_attribute('weight', return_value=True)) # Test attribute present in the current entry but not in parents, no return value - self.assertTrue(self.child_entry2._check_inherited_attribute('material', return_value=False, return_union=False)) + self.assertTrue(self.child_entry2._check_inherited_attribute('material', return_value=False)) # Test attribute not present anywhere, no return value - self.assertFalse(self.child_entry2._check_inherited_attribute('weight', return_value=False, return_union=False)) + self.assertFalse(self.child_entry2._check_inherited_attribute('weight', return_value=False)) def test_check_inherited_attribute_bool(self): # Test boolean attribute present in the current entry but not in parents - self.assertTrue(self.child_entry2._check_inherited_attribute('is_round', return_value=True, return_union=False)) - - # Test boolean attribute present in the parent and in the current entry, treat_as_string=True - with self.assertRaises(TypeError): - self.child_entry2._check_inherited_attribute('is_round', return_value=True, return_union=True) + self.assertTrue(self.child_entry2._check_inherited_attribute('is_round', return_value=True)) def test_check_inherited_attribute_numeric(self): # Test numeric attribute present only in the current entry - self.assertEqual(self.child_entry2._check_inherited_attribute('number', return_value=True, return_union=False), 5) - - # Test numeric attribute with treat_as_string=True should raise TypeError - with self.assertRaises(TypeError): - self.child_entry2._check_inherited_attribute('number', return_value=True, return_union=True) + self.assertEqual(self.child_entry2._check_inherited_attribute('number', return_value=True), 5) diff --git a/tests/schema/test_schema_util.py b/tests/schema/test_schema_util.py index 0fb72539..0d349846 100644 --- a/tests/schema/test_schema_util.py +++ b/tests/schema/test_schema_util.py @@ -2,6 +2,9 @@ import os from hed.schema.schema_io import schema_util +from hed.schema import HedSchemaGroup +from hed import load_schema_version +from hed import load_schema class Test(unittest.TestCase): @@ -17,6 +20,23 @@ def test_url_to_file(self): self.assertTrue(downloaded_file) os.remove(downloaded_file) + def test_schema_version_greater_equal(self): + schema1 = load_schema_version("8.0.0") + self.assertFalse(schema_util.schema_version_greater_equal(schema1, "8.3.0")) + + schema2 = load_schema_version("v:8.2.0") + self.assertFalse(schema_util.schema_version_greater_equal(schema2, "8.3.0")) + + schema_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), + '../data/schema_tests/schema_utf8.mediawiki') + schema3 = load_schema(schema_path, schema_namespace="tl:") + self.assertTrue(schema_util.schema_version_greater_equal(schema3, "8.3.0")) + + schema_group = HedSchemaGroup([schema1, schema2]) + self.assertFalse(schema_util.schema_version_greater_equal(schema_group, "8.3.0")) + + schema_group = HedSchemaGroup([schema2, schema3]) + self.assertTrue(schema_util.schema_version_greater_equal(schema_group, "8.3.0")) if __name__ == '__main__': unittest.main() diff --git a/tests/schema/test_schema_validation_util.py b/tests/schema/test_schema_validation_util.py index d2f12633..0163d017 100644 --- a/tests/schema/test_schema_validation_util.py +++ b/tests/schema/test_schema_validation_util.py @@ -70,24 +70,6 @@ def test_validate_schema_description(self): ] self.validate_desc_base(test_descs, expected_issues) - def test_schema_version_greater_equal(self): - schema1 = load_schema_version("8.0.0") - self.assertFalse(util.schema_version_greater_equal(schema1, "8.3.0")) - - schema2 = load_schema_version("v:8.2.0") - self.assertFalse(util.schema_version_greater_equal(schema2, "8.3.0")) - - schema_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), - '../data/schema_tests/schema_utf8.mediawiki') - schema3 = load_schema(schema_path, schema_namespace="tl:") - self.assertTrue(util.schema_version_greater_equal(schema3, "8.3.0")) - - schema_group = HedSchemaGroup([schema1, schema2]) - self.assertFalse(util.schema_version_greater_equal(schema_group, "8.3.0")) - - schema_group = HedSchemaGroup([schema2, schema3]) - self.assertTrue(util.schema_version_greater_equal(schema_group, "8.3.0")) - def test_schema_version_for_library(self): schema1 = load_schema_version("8.0.0") self.assertEqual(util.schema_version_for_library(schema1, ""), "8.0.0") diff --git a/tests/validator/test_tag_validator.py b/tests/validator/test_tag_validator.py index 38ec3ac2..5e496c09 100644 --- a/tests/validator/test_tag_validator.py +++ b/tests/validator/test_tag_validator.py @@ -1019,20 +1019,20 @@ def string_obj_func(validator): def test_special_units(self): test_strings = { 'ascii': 'Ascii/bad-date', - 'badascii': 'Ascii/bad-daté', - 'nonascii': 'Nonascii/Café', + 'illegalTab': 'Ascii/bad-dat\t', + 'allowTab': 'Nonascii/Caf\t', } expected_results = { 'ascii': True, - 'badascii': False, - 'nonascii': True + 'illegalTab': False, + 'allowTab': True } expected_issues = { 'ascii': [], - 'badascii': self.format_error(ValidationErrors.INVALID_TAG_CHARACTER, tag=0, + 'illegalTab': self.format_error(ValidationErrors.INVALID_TAG_CHARACTER, tag=0, index_in_tag=13, index_in_tag_end=14), - 'nonascii': [] + 'allowTab': [] } self.validator_semantic(test_strings, expected_results, expected_issues, True) From 9bf38b8091f27306a59d0d612321f11d7e50ab9b Mon Sep 17 00:00:00 2001 From: IanCa Date: Wed, 3 Apr 2024 19:30:22 -0500 Subject: [PATCH 2/3] fix typos --- hed/validator/hed_validator.py | 2 +- tests/validator/test_tag_validator.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/hed/validator/hed_validator.py b/hed/validator/hed_validator.py index 55c7f705..26f91393 100644 --- a/hed/validator/hed_validator.py +++ b/hed/validator/hed_validator.py @@ -32,7 +32,7 @@ def __init__(self, hed_schema, def_dicts=None, definitions_allowed=False): self._def_validator = DefValidator(def_dicts, hed_schema) self._definitions_allowed = definitions_allowed - self._validate_characters = schema.schema_83_props + self._validate_characters = hed_schema.schema_83_props self._unit_validator = UnitValueValidator(modern_allowed_char_rules=self._validate_characters) self._char_validator = CharValidator(modern_allowed_char_rules=self._validate_characters) diff --git a/tests/validator/test_tag_validator.py b/tests/validator/test_tag_validator.py index 5e496c09..3fa6deeb 100644 --- a/tests/validator/test_tag_validator.py +++ b/tests/validator/test_tag_validator.py @@ -1020,7 +1020,7 @@ def test_special_units(self): test_strings = { 'ascii': 'Ascii/bad-date', 'illegalTab': 'Ascii/bad-dat\t', - 'allowTab': 'Nonascii/Caf\t', + 'allowTab': 'Nonascii/Cafe\t', } expected_results = { 'ascii': True, From 1329ebce40b31a070b04ac0f278e5a1b2d7688a9 Mon Sep 17 00:00:00 2001 From: IanCa Date: Wed, 3 Apr 2024 19:56:20 -0500 Subject: [PATCH 3/3] Move schema_83_props to baseclass --- hed/schema/hed_schema.py | 12 ------------ hed/schema/hed_schema_base.py | 13 +++++++++++++ 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/hed/schema/hed_schema.py b/hed/schema/hed_schema.py index e0392b22..7f6fec49 100644 --- a/hed/schema/hed_schema.py +++ b/hed/schema/hed_schema.py @@ -35,7 +35,6 @@ def __init__(self): self._sections = self._create_empty_sections() self.source_format = None # The type of file this was loaded from(mediawiki, xml, or owl - None if mixed) - self._schema83 = False # If True, this is an 8.3 style schema for validation/attribute purposes # =============================================== # Basic schema properties @@ -72,17 +71,6 @@ def schema_namespace(self): """Returns the schema namespace prefix""" return self._namespace - @property - def schema_83_props(self): - """Returns if this is an 8.3.0 or greater schema. - - Returns: - is_83_schema(bool): True if standard or partnered schema is 8.3.0 or greater.""" - if self._schema83 is not None: - return self._schema83 - - self._schema83 = schema_util.schema_version_greater_equal(self, "8.3.0") - def can_save(self): """ Returns if it's legal to save this schema. diff --git a/hed/schema/hed_schema_base.py b/hed/schema/hed_schema_base.py index a6596e61..97aad41f 100644 --- a/hed/schema/hed_schema_base.py +++ b/hed/schema/hed_schema_base.py @@ -3,6 +3,7 @@ """ from hed.schema.hed_schema_constants import HedSectionKey from abc import ABC, abstractmethod +from hed.schema.schema_io import schema_util class HedSchemaBase(ABC): @@ -12,6 +13,7 @@ class HedSchemaBase(ABC): """ def __init__(self): self._name = "" # User provided identifier for this schema(not used for equality comparison or saved) + self._schema83 = None # If True, this is an 8.3 style schema for validation/attribute purposes pass @property @@ -25,6 +27,17 @@ def name(self): def name(self, name): self._name = name + @property + def schema_83_props(self): + """Returns if this is an 8.3.0 or greater schema. + + Returns: + is_83_schema(bool): True if standard or partnered schema is 8.3.0 or greater.""" + if self._schema83 is not None: + return self._schema83 + + self._schema83 = schema_util.schema_version_greater_equal(self, "8.3.0") + @abstractmethod def get_schema_versions(self): """ A list of HED version strings including namespace and library name if any of this schema.