From 7246f32e5d6fe1506ae575cd4b9d8f0e23bc642b Mon Sep 17 00:00:00 2001 From: IanCa Date: Tue, 19 Mar 2024 19:41:28 -0500 Subject: [PATCH 1/4] Minor cleanup/documentation cleanup of schema and models --- hed/errors/error_messages.py | 3 +- hed/errors/error_reporter.py | 26 ++----- hed/errors/error_types.py | 1 + hed/errors/schema_error_messages.py | 2 +- hed/models/base_input.py | 5 -- hed/models/definition_dict.py | 4 +- hed/models/definition_entry.py | 2 + hed/models/hed_tag.py | 5 ++ hed/models/query_util.py | 2 +- hed/models/string_util.py | 2 +- hed/schema/hed_schema_constants.py | 2 +- hed/schema/hed_schema_entry.py | 4 - hed/schema/hed_schema_section.py | 4 +- hed/schema/schema_attribute_validators.py | 95 ++++------------------- hed/schema/schema_compare.py | 3 +- hed/schema/schema_compliance.py | 2 +- 16 files changed, 43 insertions(+), 119 deletions(-) diff --git a/hed/errors/error_messages.py b/hed/errors/error_messages.py index b229c867..1fc508a3 100644 --- a/hed/errors/error_messages.py +++ b/hed/errors/error_messages.py @@ -1,5 +1,4 @@ -""" -The actual formatted error messages for each type. +"""Format templates for HED schema error messages. Add new errors here, or any other file imported after error_reporter.py. """ diff --git a/hed/errors/error_reporter.py b/hed/errors/error_reporter.py index 209a2876..42b12842 100644 --- a/hed/errors/error_reporter.py +++ b/hed/errors/error_reporter.py @@ -6,7 +6,7 @@ from functools import wraps import xml.etree.ElementTree as ET -import copy + from hed.errors.error_types import ErrorContext, ErrorSeverity from hed.errors.known_error_codes import known_error_codes @@ -175,6 +175,7 @@ def wrapper(tag, *args, severity=default_severity, **kwargs): class ErrorHandler: + """Class to hold error context and having general error functions.""" def __init__(self, check_for_warnings=True): # The current (ordered) dictionary of contexts. self.error_context = [] @@ -217,9 +218,6 @@ def reset_error_context(self): """ self.error_context = [] - def get_error_context_copy(self): - return copy.copy(self.error_context) - def format_error_with_context(self, *args, **kwargs): error_object = ErrorHandler.format_error(*args, **kwargs) if self is not None: @@ -253,9 +251,9 @@ def format_error(error_type, *args, actual_error=None, **kwargs): if not error_func: error_object = ErrorHandler.val_error_unknown(*args, **kwargs) error_object['code'] = error_type - return [error_object] + else: + error_object = error_func(*args, **kwargs) - error_object = error_func(*args, **kwargs) if actual_error: error_object['code'] = actual_error @@ -294,19 +292,11 @@ def format_error_from_context(error_type, error_context, *args, actual_error=Non - This can't filter out warnings like the other ones. """ - error_func = error_functions.get(error_type) - if not error_func: - error_object = ErrorHandler.val_error_unknown(*args, **kwargs) - error_object['code'] = error_type - else: - error_object = error_func(*args, **kwargs) + error_list = ErrorHandler.format_error(error_type, *args, actual_error=actual_error, **kwargs) - if actual_error: - error_object['code'] = actual_error - - ErrorHandler._add_context_to_errors(error_object, error_context) - ErrorHandler._update_error_with_char_pos(error_object) - return [error_object] + ErrorHandler._add_context_to_errors(error_list[0], error_context) + ErrorHandler._update_error_with_char_pos(error_list[0]) + return error_list @staticmethod def _add_context_to_errors(error_object, error_context_to_add): diff --git a/hed/errors/error_types.py b/hed/errors/error_types.py index 94c215f6..1fa221bf 100644 --- a/hed/errors/error_types.py +++ b/hed/errors/error_types.py @@ -2,6 +2,7 @@ class ErrorSeverity: + """Severity codes for errors""" ERROR = 1 WARNING = 10 diff --git a/hed/errors/schema_error_messages.py b/hed/errors/schema_error_messages.py index 7e2b269e..f2a7e4f4 100644 --- a/hed/errors/schema_error_messages.py +++ b/hed/errors/schema_error_messages.py @@ -1,4 +1,4 @@ -""" Format templates for HED error messages. """ +""" Format templates for HED schema error messages. """ from hed.errors.error_types import SchemaErrors, SchemaWarnings, ErrorSeverity, SchemaAttributeErrors from hed.errors.error_reporter import hed_error diff --git a/hed/models/base_input.py b/hed/models/base_input.py index f77278ae..2fd653d8 100644 --- a/hed/models/base_input.py +++ b/hed/models/base_input.py @@ -17,11 +17,6 @@ class BaseInput: TEXT_EXTENSION = ['.tsv', '.txt'] EXCEL_EXTENSION = ['.xlsx'] - FILE_EXTENSION = [*TEXT_EXTENSION, *EXCEL_EXTENSION] - STRING_INPUT = 'string' - FILE_INPUT = 'file' - TAB_DELIMITER = '\t' - COMMA_DELIMITER = ',' def __init__(self, file, file_type=None, worksheet_name=None, has_column_names=True, mapper=None, name=None, allow_blank_names=True): diff --git a/hed/models/definition_dict.py b/hed/models/definition_dict.py index 60424be9..86b4147f 100644 --- a/hed/models/definition_dict.py +++ b/hed/models/definition_dict.py @@ -158,7 +158,7 @@ def _strip_value_placeholder(self, def_tag_name): def _validate_name_and_context(self, def_tag_name, error_handler): if error_handler: - context = error_handler.get_error_context_copy() + context = error_handler.error_context else: context = [] new_def_issues = [] @@ -298,7 +298,7 @@ def get_as_strings(def_dict): def_dict(DefinitionDict or dict): A dict of definitions Returns: - dict(str: str): definition name and contents + dict(str): definition name and contents """ if isinstance(def_dict, DefinitionDict): def_dict = def_dict.defs diff --git a/hed/models/definition_entry.py b/hed/models/definition_entry.py index fb1b824f..492272ac 100644 --- a/hed/models/definition_entry.py +++ b/hed/models/definition_entry.py @@ -19,6 +19,8 @@ def __init__(self, name, contents, takes_value, source_context): if contents: contents = contents.copy() contents.sort() + if contents: + contents = contents.copy() self.contents = contents self.takes_value = takes_value self.source_context = source_context diff --git a/hed/models/hed_tag.py b/hed/models/hed_tag.py index 83bd9959..5e2281ae 100644 --- a/hed/models/hed_tag.py +++ b/hed/models/hed_tag.py @@ -592,6 +592,11 @@ def _get_tag_units_portion(extension_text, tag_unit_classes): return None, None, None def is_placeholder(self): + """Returns if this tag has a placeholder in it. + + Returns: + has_placeholder(bool): True if it has a placeholder + """ if "#" in self.org_tag or "#" in self._extension_value: return True return False diff --git a/hed/models/query_util.py b/hed/models/query_util.py index 4534a62e..2ee906f1 100644 --- a/hed/models/query_util.py +++ b/hed/models/query_util.py @@ -1,4 +1,4 @@ -""" Classes representing HED search results. """ +""" Classes representing HED search results and tokens. """ class SearchResult: diff --git a/hed/models/string_util.py b/hed/models/string_util.py index 73242490..2804ac12 100644 --- a/hed/models/string_util.py +++ b/hed/models/string_util.py @@ -53,7 +53,7 @@ def split_base_tags(hed_string, base_tags, remove_group=False): def split_def_tags(hed_string, def_names, remove_group=False): - """ Split a HedString object into two separate HedString objects based on the presence of wildcard tags. + """ Split a HedString object into two separate HedString objects based on the presence of def tags This does NOT handle def-expand tags currently. diff --git a/hed/schema/hed_schema_constants.py b/hed/schema/hed_schema_constants.py index 4194bfe3..ad22e374 100644 --- a/hed/schema/hed_schema_constants.py +++ b/hed/schema/hed_schema_constants.py @@ -2,7 +2,7 @@ class HedSectionKey(Enum): - """ Kegs designating specific sections in a HedSchema object. + """ Keys designating specific sections in a HedSchema object. """ # overarching category listing all tags Tags = 'tags' diff --git a/hed/schema/hed_schema_entry.py b/hed/schema/hed_schema_entry.py index 3f23838d..2f42cca5 100644 --- a/hed/schema/hed_schema_entry.py +++ b/hed/schema/hed_schema_entry.py @@ -125,10 +125,6 @@ def __hash__(self): def __str__(self): return self.name - def get_known_attributes(self): - return {key: value for key, value in self.attributes.items() - if not self._unknown_attributes or key not in self._unknown_attributes} - @staticmethod def _compare_attributes_no_order(left, right): if left != right: diff --git a/hed/schema/hed_schema_section.py b/hed/schema/hed_schema_section.py index dc3c64fe..99d7b168 100644 --- a/hed/schema/hed_schema_section.py +++ b/hed/schema/hed_schema_section.py @@ -149,6 +149,7 @@ def _finalize_section(self, hed_schema): class HedSchemaUnitSection(HedSchemaSection): + """The schema section containing units.""" def _check_if_duplicate(self, name_key, new_entry): """We need to mark duplicate units(units with unitSymbol are case sensitive, while others are not.""" if not new_entry.has_attribute(HedKey.UnitSymbol): @@ -157,6 +158,7 @@ def _check_if_duplicate(self, name_key, new_entry): class HedSchemaUnitClassSection(HedSchemaSection): + """The schema section containing unit classes.""" def _check_if_duplicate(self, name_key, new_entry): """Allow adding units to existing unit classes, using a placeholder one with no attributes.""" if name_key in self and len(new_entry.attributes) == 1 \ @@ -166,7 +168,7 @@ def _check_if_duplicate(self, name_key, new_entry): class HedSchemaTagSection(HedSchemaSection): - """ A section of the schema. """ + """The schema section containing all tags.""" def __init__(self, *args, case_sensitive=False, **kwargs): super().__init__(*args, **kwargs, case_sensitive=case_sensitive) diff --git a/hed/schema/schema_attribute_validators.py b/hed/schema/schema_attribute_validators.py index a053b962..16fbfcef 100644 --- a/hed/schema/schema_attribute_validators.py +++ b/hed/schema/schema_attribute_validators.py @@ -20,17 +20,7 @@ def tag_is_placeholder_check(hed_schema, tag_entry, attribute_name): - """ Check if comma separated list has valid HedTags. - - Parameters: - hed_schema (HedSchema): The schema to use for validation - tag_entry (HedSchemaEntry): The schema entry for this tag. - attribute_name (str): The name of this attribute - - Returns: - list: A list of issues. Each issue is a dictionary. - - """ + """Check if comma separated list has valid HedTags.""" issues = [] if not tag_entry.name.endswith("/#"): issues += ErrorHandler.format_error(SchemaWarnings.SCHEMA_NON_PLACEHOLDER_HAS_CLASS, tag_entry.name, @@ -40,18 +30,7 @@ def tag_is_placeholder_check(hed_schema, tag_entry, attribute_name): def attribute_is_deprecated(hed_schema, tag_entry, attribute_name): - """ Check if the attribute is deprecated. - - does not check value. - - Parameters: - hed_schema (HedSchema): The schema to use for validation - tag_entry (HedSchemaEntry): The schema entry for this tag. - attribute_name (str): The name of this attribute - - Returns: - list: A list of issues. Each issue is a dictionary. - """ + """ Check if the attribute is deprecated. does not check value.""" issues = [] # Attributes has to check properties section_key = HedSectionKey.Attributes @@ -71,17 +50,7 @@ def attribute_is_deprecated(hed_schema, tag_entry, attribute_name): # todo: This needs to be refactored, these next several functions are near identical def tag_exists_check(hed_schema, tag_entry, attribute_name): - """ Check if the list of possible tags exists in the schema. - - Parameters: - hed_schema (HedSchema): The schema to use for validation - tag_entry (HedSchemaEntry): The schema entry for this tag. - attribute_name (str): The name of this attribute - - Returns: - list: A list of issues. Each issue is a dictionary. - - """ + """Check if the list of possible tags exists in the schema.""" issues = [] possible_tags = tag_entry.attributes.get(attribute_name, "") split_tags = possible_tags.split(",") @@ -103,6 +72,7 @@ def tag_exists_check(hed_schema, tag_entry, attribute_name): def unit_class_exists(hed_schema, tag_entry, attribute_name): + """Check if comma separated list is valid unit classes.""" issues = [] possible_unit_classes = tag_entry.attributes.get(attribute_name, "") split_tags = possible_unit_classes.split(",") @@ -124,6 +94,7 @@ def unit_class_exists(hed_schema, tag_entry, attribute_name): def value_class_exists(hed_schema, tag_entry, attribute_name): + """Check if comma separated list is valid value classes.""" issues = [] possible_value_classes = tag_entry.attributes.get(attribute_name, "") split_tags = possible_value_classes.split(",") @@ -146,6 +117,7 @@ def value_class_exists(hed_schema, tag_entry, attribute_name): def unit_exists(hed_schema, tag_entry, attribute_name): + """Check the given unit is valid, and not deprecated.""" issues = [] unit = tag_entry.attributes.get(attribute_name, "") unit_entry = tag_entry.get_derivative_unit_entry(unit) @@ -165,16 +137,7 @@ def unit_exists(hed_schema, tag_entry, attribute_name): # This is effectively unused and can never fail - The schema would catch these errors and refuse to load def tag_exists_base_schema_check(hed_schema, tag_entry, attribute_name): - """ Check if the single tag is a partnered schema tag - - Parameters: - hed_schema (HedSchema): The schema to use for validation - tag_entry (HedSchemaEntry): The schema entry for this tag. - attribute_name (str): The name of this attribute - - Returns: - list: A list of issues. Each issue is a dictionary. - """ + """Check if the single tag is a partnered schema tag""" issues = [] rooted_tag = tag_entry.attributes.get(attribute_name, "") if rooted_tag and rooted_tag not in hed_schema.tags: @@ -187,16 +150,7 @@ def tag_exists_base_schema_check(hed_schema, tag_entry, attribute_name): def tag_is_deprecated_check(hed_schema, tag_entry, attribute_name): - """ Check if the element has a valid deprecatedFrom attribute, and that any children have it - - Parameters: - hed_schema (HedSchema): The schema to use for validation - tag_entry (HedSchemaEntry): The schema entry for this element. - attribute_name (str): The name of this attribute - - Returns: - list: A list of issues. Each issue is a dictionary. - """ + """ Check if the element has a valid deprecatedFrom attribute, and that any children have it""" issues = [] deprecated_version = tag_entry.attributes.get(attribute_name, "") library_name = tag_entry.has_attribute(HedKey.InLibrary, return_value=True) @@ -223,32 +177,23 @@ def tag_is_deprecated_check(hed_schema, tag_entry, attribute_name): def conversion_factor(hed_schema, tag_entry, attribute_name): + """Check if the conversion_factor on is valid""" issues = [] - conversion_factor = tag_entry.attributes.get(attribute_name, "1.0") + cf = tag_entry.attributes.get(attribute_name, "1.0") try: - conversion_factor = float(conversion_factor.replace("^", "e")) + cf = float(cf.replace("^", "e")) except (ValueError, AttributeError): pass - if not isinstance(conversion_factor, float) or conversion_factor <= 0.0: + if not isinstance(cf, float) or cf <= 0.0: issues += ErrorHandler.format_error(SchemaAttributeErrors.SCHEMA_CONVERSION_FACTOR_NOT_POSITIVE, tag_entry.name, - conversion_factor) + cf) return issues def allowed_characters_check(hed_schema, tag_entry, attribute_name): - """ Check allowed character has a valid value - - Parameters: - hed_schema (HedSchema): The schema to use for validation - tag_entry (HedSchemaEntry): The schema entry for this attribute. - attribute_name (str): The name of this attribute - - Returns: - list: A list of issues. Each issue is a dictionary. - - """ + """ Check allowed character has a valid value""" issues = [] allowed_strings = character_types @@ -263,17 +208,7 @@ def allowed_characters_check(hed_schema, tag_entry, attribute_name): def in_library_check(hed_schema, tag_entry, attribute_name): - """ Check allowed character has a valid value - - Parameters: - hed_schema (HedSchema): The schema to use for validation - tag_entry (HedSchemaEntry): The schema entry for this attribute. - attribute_name (str): The name of this attribute - - Returns: - list: A list of issues. Each issue is a dictionary. - - """ + """Check if the library attribute is a valid schema name""" issues = [] library = tag_entry.attributes.get(attribute_name, "") diff --git a/hed/schema/schema_compare.py b/hed/schema/schema_compare.py index 6f42d3a5..4a921652 100644 --- a/hed/schema/schema_compare.py +++ b/hed/schema/schema_compare.py @@ -181,8 +181,7 @@ def _sort_changes_by_severity(changes_dict): def gather_schema_changes(schema1, schema2, attribute_filter=None): - """ - Compare two schemas section by section, generated a changelog + """ Compare two schemas section by section, generating a changelog Parameters: schema1 (HedSchema): The first schema to be compared. diff --git a/hed/schema/schema_compliance.py b/hed/schema/schema_compliance.py index f3afbaac..4835d994 100644 --- a/hed/schema/schema_compliance.py +++ b/hed/schema/schema_compliance.py @@ -59,7 +59,7 @@ class SchemaValidator: HedKey.ConversionFactor: [schema_attribute_validators.conversion_factor], HedKey.AllowedCharacter: [schema_attribute_validators.allowed_characters_check], HedKey.InLibrary: [schema_attribute_validators.in_library_check] - } + } # Known attribute validators def __init__(self, hed_schema, check_for_warnings=True, error_handler=None): self.hed_schema = hed_schema From f6fbfb59ceab39f5e00b3c7b8c5ceb540c6181cf Mon Sep 17 00:00:00 2001 From: IanCa <30812436+IanCa@users.noreply.github.com> Date: Wed, 20 Mar 2024 11:21:58 -0500 Subject: [PATCH 2/4] Update schema_attribute_validators.py --- hed/schema/schema_attribute_validators.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hed/schema/schema_attribute_validators.py b/hed/schema/schema_attribute_validators.py index 16fbfcef..4237b6b6 100644 --- a/hed/schema/schema_attribute_validators.py +++ b/hed/schema/schema_attribute_validators.py @@ -8,7 +8,7 @@ - ``attribute_name (str)``: The name of this attribute. Returns: - - ``bool``: Description of the return value. + - ``issues (list)``: A list of issues found validating this attribute """ from hed.errors.error_types import SchemaWarnings, ValidationErrors, SchemaAttributeErrors From a74eb54976a8b8c4c38ebd097b623c353171fe17 Mon Sep 17 00:00:00 2001 From: IanCa Date: Wed, 20 Mar 2024 13:35:03 -0500 Subject: [PATCH 3/4] Add doc strings back --- hed/schema/schema_attribute_validators.py | 110 +++++++++++++++++++--- 1 file changed, 99 insertions(+), 11 deletions(-) diff --git a/hed/schema/schema_attribute_validators.py b/hed/schema/schema_attribute_validators.py index 4237b6b6..a48c4de0 100644 --- a/hed/schema/schema_attribute_validators.py +++ b/hed/schema/schema_attribute_validators.py @@ -20,7 +20,15 @@ def tag_is_placeholder_check(hed_schema, tag_entry, attribute_name): - """Check if comma separated list has valid HedTags.""" + """Check if comma separated list has valid HedTags. + + Parameters: + hed_schema (HedSchema): The schema to use for validation + tag_entry (HedSchemaEntry): The schema entry for this tag. + attribute_name (str): The name of this attribute + Returns: + issues(list): A list of issues from validating this attribute. + """ issues = [] if not tag_entry.name.endswith("/#"): issues += ErrorHandler.format_error(SchemaWarnings.SCHEMA_NON_PLACEHOLDER_HAS_CLASS, tag_entry.name, @@ -30,7 +38,15 @@ def tag_is_placeholder_check(hed_schema, tag_entry, attribute_name): def attribute_is_deprecated(hed_schema, tag_entry, attribute_name): - """ Check if the attribute is deprecated. does not check value.""" + """ Check if the attribute is deprecated. does not check value. + + Parameters: + hed_schema (HedSchema): The schema to use for validation + tag_entry (HedSchemaEntry): The schema entry for this tag. + attribute_name (str): The name of this attribute + Returns: + issues(list): A list of issues from validating this attribute. + """ issues = [] # Attributes has to check properties section_key = HedSectionKey.Attributes @@ -50,7 +66,15 @@ def attribute_is_deprecated(hed_schema, tag_entry, attribute_name): # todo: This needs to be refactored, these next several functions are near identical def tag_exists_check(hed_schema, tag_entry, attribute_name): - """Check if the list of possible tags exists in the schema.""" + """Check if the list of possible tags exists in the schema. + + Parameters: + hed_schema (HedSchema): The schema to use for validation + tag_entry (HedSchemaEntry): The schema entry for this tag. + attribute_name (str): The name of this attribute + Returns: + issues(list): A list of issues from validating this attribute. + """ issues = [] possible_tags = tag_entry.attributes.get(attribute_name, "") split_tags = possible_tags.split(",") @@ -72,7 +96,15 @@ def tag_exists_check(hed_schema, tag_entry, attribute_name): def unit_class_exists(hed_schema, tag_entry, attribute_name): - """Check if comma separated list is valid unit classes.""" + """Check if comma separated list is valid unit classes. + + Parameters: + hed_schema (HedSchema): The schema to use for validation + tag_entry (HedSchemaEntry): The schema entry for this tag. + attribute_name (str): The name of this attribute + Returns: + issues(list): A list of issues from validating this attribute. + """ issues = [] possible_unit_classes = tag_entry.attributes.get(attribute_name, "") split_tags = possible_unit_classes.split(",") @@ -94,7 +126,15 @@ def unit_class_exists(hed_schema, tag_entry, attribute_name): def value_class_exists(hed_schema, tag_entry, attribute_name): - """Check if comma separated list is valid value classes.""" + """Check if comma separated list is valid value classes. + + Parameters: + hed_schema (HedSchema): The schema to use for validation + tag_entry (HedSchemaEntry): The schema entry for this tag. + attribute_name (str): The name of this attribute + Returns: + issues(list): A list of issues from validating this attribute. + """ issues = [] possible_value_classes = tag_entry.attributes.get(attribute_name, "") split_tags = possible_value_classes.split(",") @@ -117,7 +157,15 @@ def value_class_exists(hed_schema, tag_entry, attribute_name): def unit_exists(hed_schema, tag_entry, attribute_name): - """Check the given unit is valid, and not deprecated.""" + """Check the given unit is valid, and not deprecated. + + Parameters: + hed_schema (HedSchema): The schema to use for validation + tag_entry (HedSchemaEntry): The schema entry for this tag. + attribute_name (str): The name of this attribute + Returns: + issues(list): A list of issues from validating this attribute. + """ issues = [] unit = tag_entry.attributes.get(attribute_name, "") unit_entry = tag_entry.get_derivative_unit_entry(unit) @@ -137,7 +185,15 @@ def unit_exists(hed_schema, tag_entry, attribute_name): # This is effectively unused and can never fail - The schema would catch these errors and refuse to load def tag_exists_base_schema_check(hed_schema, tag_entry, attribute_name): - """Check if the single tag is a partnered schema tag""" + """Check if the single tag is a partnered schema tag + + Parameters: + hed_schema (HedSchema): The schema to use for validation + tag_entry (HedSchemaEntry): The schema entry for this tag. + attribute_name (str): The name of this attribute + Returns: + issues(list): A list of issues from validating this attribute. + """ issues = [] rooted_tag = tag_entry.attributes.get(attribute_name, "") if rooted_tag and rooted_tag not in hed_schema.tags: @@ -150,7 +206,15 @@ def tag_exists_base_schema_check(hed_schema, tag_entry, attribute_name): def tag_is_deprecated_check(hed_schema, tag_entry, attribute_name): - """ Check if the element has a valid deprecatedFrom attribute, and that any children have it""" + """ Check if the element has a valid deprecatedFrom attribute, and that any children have it + + Parameters: + hed_schema (HedSchema): The schema to use for validation + tag_entry (HedSchemaEntry): The schema entry for this tag. + attribute_name (str): The name of this attribute + Returns: + issues(list): A list of issues from validating this attribute. + """ issues = [] deprecated_version = tag_entry.attributes.get(attribute_name, "") library_name = tag_entry.has_attribute(HedKey.InLibrary, return_value=True) @@ -177,7 +241,15 @@ def tag_is_deprecated_check(hed_schema, tag_entry, attribute_name): def conversion_factor(hed_schema, tag_entry, attribute_name): - """Check if the conversion_factor on is valid""" + """Check if the conversion_factor on is valid + + Parameters: + hed_schema (HedSchema): The schema to use for validation + tag_entry (HedSchemaEntry): The schema entry for this tag. + attribute_name (str): The name of this attribute + Returns: + issues(list): A list of issues from validating this attribute. + """ issues = [] cf = tag_entry.attributes.get(attribute_name, "1.0") try: @@ -193,7 +265,15 @@ def conversion_factor(hed_schema, tag_entry, attribute_name): def allowed_characters_check(hed_schema, tag_entry, attribute_name): - """ Check allowed character has a valid value""" + """ Check allowed character has a valid value + + Parameters: + hed_schema (HedSchema): The schema to use for validation + tag_entry (HedSchemaEntry): The schema entry for this tag. + attribute_name (str): The name of this attribute + Returns: + issues(list): A list of issues from validating this attribute. + """ issues = [] allowed_strings = character_types @@ -208,7 +288,15 @@ def allowed_characters_check(hed_schema, tag_entry, attribute_name): def in_library_check(hed_schema, tag_entry, attribute_name): - """Check if the library attribute is a valid schema name""" + """Check if the library attribute is a valid schema name + + Parameters: + hed_schema (HedSchema): The schema to use for validation + tag_entry (HedSchemaEntry): The schema entry for this tag. + attribute_name (str): The name of this attribute + Returns: + issues(list): A list of issues from validating this attribute. + """ issues = [] library = tag_entry.attributes.get(attribute_name, "") From c93eb00c2c5dd235996798309dcfa412a12bd1d8 Mon Sep 17 00:00:00 2001 From: IanCa Date: Wed, 20 Mar 2024 14:38:37 -0500 Subject: [PATCH 4/4] Comment out owl related code pending new version --- hed/schema/hed_schema.py | 94 ++-- hed/schema/hed_schema_io.py | 25 +- hed/schema/schema_io/owl2schema.py | 570 +++++++++++------------ hed/schema/schema_io/owl_constants.py | 94 ++-- hed/schema/schema_io/schema2owl.py | 626 +++++++++++++------------- tests/schema/test_hed_schema_io.py | 154 +++---- 6 files changed, 779 insertions(+), 784 deletions(-) diff --git a/hed/schema/hed_schema.py b/hed/schema/hed_schema.py index dbba8046..19732d21 100644 --- a/hed/schema/hed_schema.py +++ b/hed/schema/hed_schema.py @@ -6,8 +6,8 @@ from hed.schema.schema_io import schema_util from hed.schema.schema_io.schema2xml import Schema2XML from hed.schema.schema_io.schema2wiki import Schema2Wiki -from hed.schema.schema_io.schema2owl import Schema2Owl -from hed.schema.schema_io.owl_constants import ext_to_format +# from hed.schema.schema_io.schema2owl import Schema2Owl +# from hed.schema.schema_io.owl_constants import ext_to_format from hed.schema.hed_schema_section import (HedSchemaSection, HedSchemaTagSection, HedSchemaUnitClassSection, HedSchemaUnitSection) from hed.errors import ErrorHandler @@ -246,25 +246,25 @@ def get_as_mediawiki_string(self, save_merged=False): output_strings = Schema2Wiki.process_schema(self, save_merged) return '\n'.join(output_strings) - def get_as_owl_string(self, save_merged=False, file_format="owl"): - """ Return the schema to a mediawiki string. - - Parameters: - save_merged (bool): If True, this will save the schema as a merged schema if it is a "withStandard" schema. - If it is not a "withStandard" schema, this setting has no effect. - file_format(str or None): Override format from filename extension. - Accepts any value rdflib accepts(We fully support "turtle", "xml"("owl" also accepted) and "json-ld"). - Other values should work, but aren't as fully supported. - Returns: - str: The schema as a string in mediawiki format. - - :raises rdflib.plugin.PluginException: - - Invalid format of file_format. Make sure you use a supported RDF format. - """ - if file_format == "owl": - file_format = "xml" - rdf_data = Schema2Owl.process_schema(self, save_merged) - return rdf_data.serialize(format=file_format) + # def get_as_owl_string(self, save_merged=False, file_format="owl"): + # """ Return the schema to a mediawiki string. + # + # Parameters: + # save_merged (bool): If True, this will save the schema as a merged schema if it is a "withStandard" schema. + # If it is not a "withStandard" schema, this setting has no effect. + # file_format(str or None): Override format from filename extension. + # Accepts any value rdflib accepts(We fully support "turtle", "xml"("owl" also accepted) and "json-ld"). + # Other values should work, but aren't as fully supported. + # Returns: + # str: The schema as a string in mediawiki format. + # + # :raises rdflib.plugin.PluginException: + # - Invalid format of file_format. Make sure you use a supported RDF format. + # """ + # if file_format == "owl": + # file_format = "xml" + # rdf_data = Schema2Owl.process_schema(self, save_merged) + # return rdf_data.serialize(format=file_format) def get_as_xml_string(self, save_merged=True): """ Return the schema to an XML string. @@ -298,32 +298,32 @@ def save_as_mediawiki(self, filename, save_merged=False): opened_file.write(string) opened_file.write('\n') - def save_as_owl(self, filename, save_merged=False, file_format=None): - """ Save as json to a file. - - filename: str - Save the file here - save_merged: bool - If True, this will save the schema as a merged schema if it is a "withStandard" schema. - If it is not a "withStandard" schema, this setting has no effect. - file_format(str or None): Required for owl formatted files other than the following: - .ttl: turtle - .owl: xml - .json-ld: json-ld - - :raises OSError: - - File cannot be saved for some reason - - :raises rdflib.plugin.PluginException: - - Invalid format of file_format. Make sure you use a supported RDF format. - """ - ext = os.path.splitext(filename.lower())[1] - if ext in ext_to_format and file_format is None: - file_format = ext_to_format[ext] - if file_format == "owl": - file_format = "xml" - rdf_data = Schema2Owl.process_schema(self, save_merged) - rdf_data.serialize(filename, format=file_format) + # def save_as_owl(self, filename, save_merged=False, file_format=None): + # """ Save as json to a file. + # + # filename: str + # Save the file here + # save_merged: bool + # If True, this will save the schema as a merged schema if it is a "withStandard" schema. + # If it is not a "withStandard" schema, this setting has no effect. + # file_format(str or None): Required for owl formatted files other than the following: + # .ttl: turtle + # .owl: xml + # .json-ld: json-ld + # + # :raises OSError: + # - File cannot be saved for some reason + # + # :raises rdflib.plugin.PluginException: + # - Invalid format of file_format. Make sure you use a supported RDF format. + # """ + # ext = os.path.splitext(filename.lower())[1] + # if ext in ext_to_format and file_format is None: + # file_format = ext_to_format[ext] + # if file_format == "owl": + # file_format = "xml" + # rdf_data = Schema2Owl.process_schema(self, save_merged) + # rdf_data.serialize(filename, format=file_format) def save_as_xml(self, filename, save_merged=True): """ Save as XML to a file. diff --git a/hed/schema/hed_schema_io.py b/hed/schema/hed_schema_io.py index b4691022..fe26aa11 100644 --- a/hed/schema/hed_schema_io.py +++ b/hed/schema/hed_schema_io.py @@ -5,7 +5,7 @@ from hed.schema.schema_io.xml2schema import SchemaLoaderXML from hed.schema.schema_io.wiki2schema import SchemaLoaderWiki -from hed.schema.schema_io.owl2schema import SchemaLoaderOWL +# from hed.schema.schema_io.owl2schema import SchemaLoaderOWL from hed.schema import hed_cache from hed.errors.exceptions import HedFileError, HedExceptions @@ -13,7 +13,7 @@ from hed.schema.hed_schema_group import HedSchemaGroup from hed.schema.schema_validation_util import validate_version_string from collections import defaultdict -from hed.schema.schema_io.owl_constants import ext_to_format +# from hed.schema.schema_io.owl_constants import ext_to_format from urllib.error import URLError MAX_MEMORY_CACHE = 40 @@ -26,7 +26,6 @@ def from_string(schema_string, schema_format=".xml", schema_namespace=None, sche schema_string (str): An XML, mediawiki or OWL, file as a single long string schema_format (str): The schema format of the source schema string. Allowed normal values: .mediawiki, .xml - Allowed owl values: xml, owl, pretty-xml, turtle (or any other value rdflib supports) schema_namespace (str, None): The name_prefix all tags in this schema will accept. schema(HedSchema or None): A hed schema to merge this new file into It must be a with-standard schema with the same value. @@ -54,9 +53,9 @@ def from_string(schema_string, schema_format=".xml", schema_namespace=None, sche hed_schema = SchemaLoaderXML.load(schema_as_string=schema_string, schema=schema, name=name) elif schema_format.endswith(".mediawiki"): hed_schema = SchemaLoaderWiki.load(schema_as_string=schema_string, schema=schema, name=name) - elif schema_format: - hed_schema = SchemaLoaderOWL.load(schema_as_string=schema_string, schema=schema, file_format=schema_format, - name=name) + # elif schema_format: + # hed_schema = SchemaLoaderOWL.load(schema_as_string=schema_string, schema=schema, file_format=schema_format, + # name=name) else: raise HedFileError(HedExceptions.INVALID_EXTENSION, f"Unknown schema extension {schema_format}", filename=name) @@ -65,7 +64,7 @@ def from_string(schema_string, schema_format=".xml", schema_namespace=None, sche return hed_schema -def load_schema(hed_path, schema_namespace=None, schema=None, file_format=None, name=None): +def load_schema(hed_path, schema_namespace=None, schema=None, name=None): """ Load a schema from the given file or URL path. Parameters: @@ -73,10 +72,6 @@ def load_schema(hed_path, schema_namespace=None, schema=None, file_format=None, schema_namespace (str or None): The name_prefix all tags in this schema will accept. schema(HedSchema or None): A hed schema to merge this new file into It must be a with-standard schema with the same value. - file_format(str or None): Required for owl formatted files other than the following: - .ttl: turtle - .owl: xml - .json-ld: json-ld name(str or None): User supplied identifier for this schema Returns: @@ -100,10 +95,10 @@ def load_schema(hed_path, schema_namespace=None, schema=None, file_format=None, except URLError as e: raise HedFileError(HedExceptions.URL_ERROR, str(e), hed_path) from e hed_schema = from_string(file_as_string, schema_format=os.path.splitext(hed_path.lower())[1], name=name) - elif ext in ext_to_format: - hed_schema = SchemaLoaderOWL.load(hed_path, schema=schema, file_format=ext_to_format[ext], name=name) - elif file_format: - hed_schema = SchemaLoaderOWL.load(hed_path, schema=schema, file_format=file_format, name=name) + # elif ext in ext_to_format: + # hed_schema = SchemaLoaderOWL.load(hed_path, schema=schema, file_format=ext_to_format[ext], name=name) + # elif file_format: + # hed_schema = SchemaLoaderOWL.load(hed_path, schema=schema, file_format=file_format, name=name) elif hed_path.lower().endswith(".xml"): hed_schema = SchemaLoaderXML.load(hed_path, schema=schema, name=name) elif hed_path.lower().endswith(".mediawiki"): diff --git a/hed/schema/schema_io/owl2schema.py b/hed/schema/schema_io/owl2schema.py index 09f3ccd4..da8970ce 100644 --- a/hed/schema/schema_io/owl2schema.py +++ b/hed/schema/schema_io/owl2schema.py @@ -1,285 +1,285 @@ -""" -This module is used to create a HedSchema object from an OWL file or graph. -""" - - -from hed.errors.exceptions import HedFileError, HedExceptions -from hed.schema.hed_schema_constants import HedSectionKey, HedKey -from hed.schema import schema_validation_util -from .base2schema import SchemaLoader -import rdflib -from rdflib.exceptions import ParserError -from rdflib import RDF, RDFS, URIRef, OWL -from collections import defaultdict - -from hed.schema.schema_io.owl_constants import HED, HEDT, HEDU, HEDUM - - -class SchemaLoaderOWL(SchemaLoader): - """ Loads XML schemas from filenames or strings. - - Expected usage is SchemaLoaderXML.load(filename) - - SchemaLoaderXML(filename) will load just the header_attributes - """ - def __init__(self, filename, schema_as_string=None, schema=None, file_format=None, name=""): - if schema_as_string and not file_format: - raise HedFileError(HedExceptions.BAD_PARAMETERS, - "Must pass a file_format if loading owl schema as a string.", - name) - super().__init__(filename, schema_as_string, schema, file_format, name) - - self._schema.source_format = ".owl" - self.graph = None - # When loading, this stores rooted tag name -> full root path pairs - self._rooted_cache = {} - - def _open_file(self): - """Parses a Turtle/owl/etc file and returns the RDF graph.""" - - graph = rdflib.Graph() - try: - if self.filename: - graph.parse(self.filename, format=self.file_format) - else: - graph.parse(data=self.schema_as_string, format=self.file_format) - except FileNotFoundError as fnf_error: - raise HedFileError(HedExceptions.FILE_NOT_FOUND, str(fnf_error), self.name) - except ParserError as parse_error: - raise HedFileError(HedExceptions.CANNOT_PARSE_RDF, str(parse_error), self.name) - - return graph - - def _read_prologue(self): - """Reads the Prologue section from the ontology.""" - prologue = self.graph.value(subject=HED.Prologue, predicate=HED.elementValue, any=False) - return str(prologue) if prologue else "" - - def _read_epilogue(self): - """Reads the Epilogue section from the ontology.""" - epilogue = self.graph.value(subject=HED.Epilogue, predicate=HED.elementValue, any=False) - return str(epilogue) if epilogue else "" - - def _get_header_attributes(self, graph): - """Parses header attributes from an RDF graph into a dictionary.""" - header_attributes = {} - for s, _, _ in graph.triples((None, RDF.type, HED.HeaderMember)): - label = graph.value(s, RDFS.label) - if label: - header_attribute = graph.value(s, HED.HeaderAttribute) - header_attributes[str(label)] = str(header_attribute) if header_attribute else None - return header_attributes - - def _parse_data(self): - self.graph = self.input_data - self.graph.bind("hed", HED) - self.graph.bind("hedt", HEDT) - self.graph.bind("hedu", HEDU) - self.graph.bind("hedum", HEDUM) - - self._schema.epilogue = self._read_epilogue() - self._schema.prologue = self._read_prologue() - self._get_header_attributes(self.graph) - self._read_properties() - self._read_attributes() - self._read_units() - self._read_section(HedSectionKey.ValueClasses, HED.HedValueClass) - self._read_section(HedSectionKey.UnitModifiers, HED.HedUnitModifier) - self._read_tags() - - def get_local_names_from_uris(parent_chain, tag_uri): - """ - Extracts local names from URIs using RDFlib's n3() method. - """ - full_names = [] - for uri in parent_chain + [tag_uri]: - # Serialize the URI into N3 format and extract the local name - name = uri.n3(namespace_manager=HED.namespace_manager).split(':')[-1] - full_names.append(name) - - return full_names - - def sort_classes_by_hierarchy(self, classes): - """ - Sorts all tags based on assembled full name - - Returns: - list of tuples. - Left Tag URI, right side is parent labels(not including self) - """ - parent_chains = [] - full_tag_names = [] - for tag_uri in classes: - parent_chain = self._get_parent_chain(tag_uri) - parent_chain = [uri.n3(namespace_manager=self.graph.namespace_manager).split(':')[-1] for uri in parent_chain + [tag_uri]] - # parent_chain = [self.graph.value(p, RDFS.label) or p for p in parent_chain + [tag_uri]] - full_tag_names.append("/".join(parent_chain)) - parent_chains.append((tag_uri, parent_chain[:-1])) - - # Sort parent_chains by full_tag_names. - _, parent_chains = zip(*sorted(zip(full_tag_names, parent_chains))) - - return parent_chains - - def _get_parent_chain(self, cls): - """ Recursively builds the parent chain for a given class. """ - parent = self.graph.value(subject=cls, predicate=HED.hasHedParent) - if parent is None: - return [] - return self._get_parent_chain(parent) + [parent] - - def _parse_uri(self, uri, key_class, name=None): - if name: - label = name - else: - label = self.graph.value(subject=uri, predicate=RDFS.label) - if not label: - raise ValueError(f"Empty label value found in owl file in uri {uri}") - label = str(label) - - tag_entry = self._schema._create_tag_entry(label, key_class) - - description = self.graph.value(subject=uri, predicate=RDFS.comment) - if description: - tag_entry.description = str(description) - - section = self._schema._sections[key_class] - valid_attributes = section.valid_attributes - - new_values = defaultdict(list) - for predicate, obj in self.graph.predicate_objects(subject=uri): - # Convert predicate URI to a readable string, assuming it's in a known namespace - attr_name = predicate.n3(self.graph.namespace_manager).split(':')[1] - - if attr_name in valid_attributes: - if isinstance(obj, URIRef): - attr_value = obj.n3(self.graph.namespace_manager).split(':')[1] - else: - attr_value = str(obj) - - new_values[attr_name].append(attr_value) - - for name, value in new_values.items(): - value = ",".join(value) - if value == "true": - value = True - tag_entry._set_attribute_value(name, value) - - return tag_entry - - def _get_classes_with_subproperty(self, subproperty_uri, base_type): - """Iterates over all classes that have a specified rdfs:subPropertyOf.""" - classes = set() - for s in self.graph.subjects(RDF.type, base_type): - if (s, RDFS.subPropertyOf, subproperty_uri) in self.graph: - classes.add(s) - return classes - - def _get_all_subclasses(self, base_type): - """ - Recursively finds all subclasses of the given base_type. - """ - subclasses = set() - for subclass in self.graph.subjects(RDFS.subClassOf, base_type): - subclasses.add(subclass) - subclasses.update(self._get_all_subclasses(subclass)) - return subclasses - - def _get_classes(self, base_type): - """ - Retrieves all instances of the given base_type, including instances of its subclasses. - """ - classes = set() - # Add instances of the base type - for s in self.graph.subjects(RDF.type, base_type): - classes.add(s) - # Add instances of all subclasses - for subclass in self._get_all_subclasses(base_type): - for s in self.graph.subjects(RDF.type, subclass): - classes.add(s) - return classes - - def _read_properties(self): - key_class = HedSectionKey.Properties - self._schema._initialize_attributes(key_class) - prop_uris = self._get_classes_with_subproperty(HED.schemaProperty, OWL.AnnotationProperty) - for uri in prop_uris: - new_entry = self._parse_uri(uri, key_class) - self._add_to_dict(new_entry, key_class) - - def _read_attributes(self): - key_class = HedSectionKey.Attributes - self._schema._initialize_attributes(key_class) - prop_uris = self._get_classes_with_subproperty(HED.schemaAttributeDatatypeProperty, OWL.DatatypeProperty) - prop_uris.update(self._get_classes_with_subproperty(HED.schemaAttributeObjectProperty, OWL.ObjectProperty)) - - for uri in prop_uris: - new_entry = self._parse_uri(uri, key_class) - self._add_to_dict(new_entry, key_class) - - def _read_section(self, key_class, node_uri): - self._schema._initialize_attributes(key_class) - classes = self._get_classes(node_uri) - for uri in classes: - new_entry = self._parse_uri(uri, key_class) - self._add_to_dict(new_entry, key_class) - - def _read_units(self): - self._schema._initialize_attributes(HedSectionKey.UnitClasses) - self._schema._initialize_attributes(HedSectionKey.Units) - key_class = HedSectionKey.UnitClasses - classes = self._get_classes(HED.HedUnitClass) - unit_classes = {} - for uri in classes: - new_entry = self._parse_uri(uri, key_class) - self._add_to_dict(new_entry, key_class) - unit_classes[uri] = new_entry - - key_class = HedSectionKey.Units - units = self._get_classes(HED.HedUnit) - for uri in units: - new_entry = self._parse_uri(uri, key_class) - self._add_to_dict(new_entry, key_class) - unit_class_uri = self.graph.value(subject=uri, predicate=HED.unitClass) - class_entry = unit_classes.get(unit_class_uri) - class_entry.add_unit(new_entry) - - def _add_tag_internal(self, uri, parent_tags): - tag_name = self.graph.value(uri, RDFS.label) - if not tag_name: - raise ValueError(f"No label for uri {uri}") - tag_name = str(tag_name) - parents_and_child = parent_tags + [tag_name] - if parent_tags and parents_and_child[0] in self._rooted_cache: - full_tag = "/".join([self._rooted_cache[parents_and_child[0]]] + parents_and_child[1:]) - else: - full_tag = "/".join(parents_and_child) - - tag_entry = self._parse_uri(uri, HedSectionKey.Tags, full_tag) - - rooted_entry = schema_validation_util.find_rooted_entry(tag_entry, self._schema, self._loading_merged) - if rooted_entry: - loading_from_chain = rooted_entry.name + "/" + tag_entry.short_tag_name - loading_from_chain_short = tag_entry.short_tag_name - self._rooted_cache[tag_entry.short_tag_name] = loading_from_chain - full_tag = full_tag.replace(loading_from_chain_short, loading_from_chain) - tag_entry = self._parse_uri(uri, HedSectionKey.Tags, full_tag) - - self._add_to_dict(tag_entry, HedSectionKey.Tags) - - def _read_tags(self): - """Populates a dictionary of dictionaries associated with tags and their attributes.""" - classes = self._get_classes(HED.HedTag) - classes.update(self._get_classes(HED.HedPlaceholder)) - sorted_classes = self.sort_classes_by_hierarchy(classes) - self._schema._initialize_attributes(HedSectionKey.Tags) - for uri, parents in sorted_classes: - self._add_tag_internal(uri, parents) - - def _add_to_dict(self, entry, key_class): - if entry.has_attribute(HedKey.InLibrary) and not self._loading_merged and not self.appending_to_schema: - raise HedFileError(HedExceptions.IN_LIBRARY_IN_UNMERGED, - "Library tag in unmerged schema has InLibrary attribute", - self.name) - - return self._add_to_dict_base(entry, key_class) +# """ +# This module is used to create a HedSchema object from an OWL file or graph. +# """ +# +# +# from hed.errors.exceptions import HedFileError, HedExceptions +# from hed.schema.hed_schema_constants import HedSectionKey, HedKey +# from hed.schema import schema_validation_util +# from .base2schema import SchemaLoader +# import rdflib +# from rdflib.exceptions import ParserError +# from rdflib import RDF, RDFS, URIRef, OWL +# from collections import defaultdict +# +# from hed.schema.schema_io.owl_constants import HED, HEDT, HEDU, HEDUM +# +# +# class SchemaLoaderOWL(SchemaLoader): +# """ Loads XML schemas from filenames or strings. +# +# Expected usage is SchemaLoaderXML.load(filename) +# +# SchemaLoaderXML(filename) will load just the header_attributes +# """ +# def __init__(self, filename, schema_as_string=None, schema=None, file_format=None, name=""): +# if schema_as_string and not file_format: +# raise HedFileError(HedExceptions.BAD_PARAMETERS, +# "Must pass a file_format if loading owl schema as a string.", +# name) +# super().__init__(filename, schema_as_string, schema, file_format, name) +# +# self._schema.source_format = ".owl" +# self.graph = None +# # When loading, this stores rooted tag name -> full root path pairs +# self._rooted_cache = {} +# +# def _open_file(self): +# """Parses a Turtle/owl/etc file and returns the RDF graph.""" +# +# graph = rdflib.Graph() +# try: +# if self.filename: +# graph.parse(self.filename, format=self.file_format) +# else: +# graph.parse(data=self.schema_as_string, format=self.file_format) +# except FileNotFoundError as fnf_error: +# raise HedFileError(HedExceptions.FILE_NOT_FOUND, str(fnf_error), self.name) +# except ParserError as parse_error: +# raise HedFileError(HedExceptions.CANNOT_PARSE_RDF, str(parse_error), self.name) +# +# return graph +# +# def _read_prologue(self): +# """Reads the Prologue section from the ontology.""" +# prologue = self.graph.value(subject=HED.Prologue, predicate=HED.elementValue, any=False) +# return str(prologue) if prologue else "" +# +# def _read_epilogue(self): +# """Reads the Epilogue section from the ontology.""" +# epilogue = self.graph.value(subject=HED.Epilogue, predicate=HED.elementValue, any=False) +# return str(epilogue) if epilogue else "" +# +# def _get_header_attributes(self, graph): +# """Parses header attributes from an RDF graph into a dictionary.""" +# header_attributes = {} +# for s, _, _ in graph.triples((None, RDF.type, HED.HeaderMember)): +# label = graph.value(s, RDFS.label) +# if label: +# header_attribute = graph.value(s, HED.HeaderAttribute) +# header_attributes[str(label)] = str(header_attribute) if header_attribute else None +# return header_attributes +# +# def _parse_data(self): +# self.graph = self.input_data +# self.graph.bind("hed", HED) +# self.graph.bind("hedt", HEDT) +# self.graph.bind("hedu", HEDU) +# self.graph.bind("hedum", HEDUM) +# +# self._schema.epilogue = self._read_epilogue() +# self._schema.prologue = self._read_prologue() +# self._get_header_attributes(self.graph) +# self._read_properties() +# self._read_attributes() +# self._read_units() +# self._read_section(HedSectionKey.ValueClasses, HED.HedValueClass) +# self._read_section(HedSectionKey.UnitModifiers, HED.HedUnitModifier) +# self._read_tags() +# +# def get_local_names_from_uris(parent_chain, tag_uri): +# """ +# Extracts local names from URIs using RDFlib's n3() method. +# """ +# full_names = [] +# for uri in parent_chain + [tag_uri]: +# # Serialize the URI into N3 format and extract the local name +# name = uri.n3(namespace_manager=HED.namespace_manager).split(':')[-1] +# full_names.append(name) +# +# return full_names +# +# def sort_classes_by_hierarchy(self, classes): +# """ +# Sorts all tags based on assembled full name +# +# Returns: +# list of tuples. +# Left Tag URI, right side is parent labels(not including self) +# """ +# parent_chains = [] +# full_tag_names = [] +# for tag_uri in classes: +# parent_chain = self._get_parent_chain(tag_uri) +# parent_chain = [uri.n3(namespace_manager=self.graph.namespace_manager).split(':')[-1] for uri in parent_chain + [tag_uri]] +# # parent_chain = [self.graph.value(p, RDFS.label) or p for p in parent_chain + [tag_uri]] +# full_tag_names.append("/".join(parent_chain)) +# parent_chains.append((tag_uri, parent_chain[:-1])) +# +# # Sort parent_chains by full_tag_names. +# _, parent_chains = zip(*sorted(zip(full_tag_names, parent_chains))) +# +# return parent_chains +# +# def _get_parent_chain(self, cls): +# """ Recursively builds the parent chain for a given class. """ +# parent = self.graph.value(subject=cls, predicate=HED.hasHedParent) +# if parent is None: +# return [] +# return self._get_parent_chain(parent) + [parent] +# +# def _parse_uri(self, uri, key_class, name=None): +# if name: +# label = name +# else: +# label = self.graph.value(subject=uri, predicate=RDFS.label) +# if not label: +# raise ValueError(f"Empty label value found in owl file in uri {uri}") +# label = str(label) +# +# tag_entry = self._schema._create_tag_entry(label, key_class) +# +# description = self.graph.value(subject=uri, predicate=RDFS.comment) +# if description: +# tag_entry.description = str(description) +# +# section = self._schema._sections[key_class] +# valid_attributes = section.valid_attributes +# +# new_values = defaultdict(list) +# for predicate, obj in self.graph.predicate_objects(subject=uri): +# # Convert predicate URI to a readable string, assuming it's in a known namespace +# attr_name = predicate.n3(self.graph.namespace_manager).split(':')[1] +# +# if attr_name in valid_attributes: +# if isinstance(obj, URIRef): +# attr_value = obj.n3(self.graph.namespace_manager).split(':')[1] +# else: +# attr_value = str(obj) +# +# new_values[attr_name].append(attr_value) +# +# for name, value in new_values.items(): +# value = ",".join(value) +# if value == "true": +# value = True +# tag_entry._set_attribute_value(name, value) +# +# return tag_entry +# +# def _get_classes_with_subproperty(self, subproperty_uri, base_type): +# """Iterates over all classes that have a specified rdfs:subPropertyOf.""" +# classes = set() +# for s in self.graph.subjects(RDF.type, base_type): +# if (s, RDFS.subPropertyOf, subproperty_uri) in self.graph: +# classes.add(s) +# return classes +# +# def _get_all_subclasses(self, base_type): +# """ +# Recursively finds all subclasses of the given base_type. +# """ +# subclasses = set() +# for subclass in self.graph.subjects(RDFS.subClassOf, base_type): +# subclasses.add(subclass) +# subclasses.update(self._get_all_subclasses(subclass)) +# return subclasses +# +# def _get_classes(self, base_type): +# """ +# Retrieves all instances of the given base_type, including instances of its subclasses. +# """ +# classes = set() +# # Add instances of the base type +# for s in self.graph.subjects(RDF.type, base_type): +# classes.add(s) +# # Add instances of all subclasses +# for subclass in self._get_all_subclasses(base_type): +# for s in self.graph.subjects(RDF.type, subclass): +# classes.add(s) +# return classes +# +# def _read_properties(self): +# key_class = HedSectionKey.Properties +# self._schema._initialize_attributes(key_class) +# prop_uris = self._get_classes_with_subproperty(HED.schemaProperty, OWL.AnnotationProperty) +# for uri in prop_uris: +# new_entry = self._parse_uri(uri, key_class) +# self._add_to_dict(new_entry, key_class) +# +# def _read_attributes(self): +# key_class = HedSectionKey.Attributes +# self._schema._initialize_attributes(key_class) +# prop_uris = self._get_classes_with_subproperty(HED.schemaAttributeDatatypeProperty, OWL.DatatypeProperty) +# prop_uris.update(self._get_classes_with_subproperty(HED.schemaAttributeObjectProperty, OWL.ObjectProperty)) +# +# for uri in prop_uris: +# new_entry = self._parse_uri(uri, key_class) +# self._add_to_dict(new_entry, key_class) +# +# def _read_section(self, key_class, node_uri): +# self._schema._initialize_attributes(key_class) +# classes = self._get_classes(node_uri) +# for uri in classes: +# new_entry = self._parse_uri(uri, key_class) +# self._add_to_dict(new_entry, key_class) +# +# def _read_units(self): +# self._schema._initialize_attributes(HedSectionKey.UnitClasses) +# self._schema._initialize_attributes(HedSectionKey.Units) +# key_class = HedSectionKey.UnitClasses +# classes = self._get_classes(HED.HedUnitClass) +# unit_classes = {} +# for uri in classes: +# new_entry = self._parse_uri(uri, key_class) +# self._add_to_dict(new_entry, key_class) +# unit_classes[uri] = new_entry +# +# key_class = HedSectionKey.Units +# units = self._get_classes(HED.HedUnit) +# for uri in units: +# new_entry = self._parse_uri(uri, key_class) +# self._add_to_dict(new_entry, key_class) +# unit_class_uri = self.graph.value(subject=uri, predicate=HED.unitClass) +# class_entry = unit_classes.get(unit_class_uri) +# class_entry.add_unit(new_entry) +# +# def _add_tag_internal(self, uri, parent_tags): +# tag_name = self.graph.value(uri, RDFS.label) +# if not tag_name: +# raise ValueError(f"No label for uri {uri}") +# tag_name = str(tag_name) +# parents_and_child = parent_tags + [tag_name] +# if parent_tags and parents_and_child[0] in self._rooted_cache: +# full_tag = "/".join([self._rooted_cache[parents_and_child[0]]] + parents_and_child[1:]) +# else: +# full_tag = "/".join(parents_and_child) +# +# tag_entry = self._parse_uri(uri, HedSectionKey.Tags, full_tag) +# +# rooted_entry = schema_validation_util.find_rooted_entry(tag_entry, self._schema, self._loading_merged) +# if rooted_entry: +# loading_from_chain = rooted_entry.name + "/" + tag_entry.short_tag_name +# loading_from_chain_short = tag_entry.short_tag_name +# self._rooted_cache[tag_entry.short_tag_name] = loading_from_chain +# full_tag = full_tag.replace(loading_from_chain_short, loading_from_chain) +# tag_entry = self._parse_uri(uri, HedSectionKey.Tags, full_tag) +# +# self._add_to_dict(tag_entry, HedSectionKey.Tags) +# +# def _read_tags(self): +# """Populates a dictionary of dictionaries associated with tags and their attributes.""" +# classes = self._get_classes(HED.HedTag) +# classes.update(self._get_classes(HED.HedPlaceholder)) +# sorted_classes = self.sort_classes_by_hierarchy(classes) +# self._schema._initialize_attributes(HedSectionKey.Tags) +# for uri, parents in sorted_classes: +# self._add_tag_internal(uri, parents) +# +# def _add_to_dict(self, entry, key_class): +# if entry.has_attribute(HedKey.InLibrary) and not self._loading_merged and not self.appending_to_schema: +# raise HedFileError(HedExceptions.IN_LIBRARY_IN_UNMERGED, +# "Library tag in unmerged schema has InLibrary attribute", +# self.name) +# +# return self._add_to_dict_base(entry, key_class) diff --git a/hed/schema/schema_io/owl_constants.py b/hed/schema/schema_io/owl_constants.py index bbca40d2..e63b95bd 100644 --- a/hed/schema/schema_io/owl_constants.py +++ b/hed/schema/schema_io/owl_constants.py @@ -1,50 +1,50 @@ -from rdflib import Namespace - -from hed.schema.hed_schema_constants import HedSectionKey - - -# Default file associations(notably owl maps to XML format, as we already use XML) -ext_to_format = { - ".ttl": "turtle", - ".owl": "xml", - ".json-ld": "json-ld" -} - -# Core schema structural types in owl -HED = Namespace("https://purl.org/hed#") -# Tags -HEDT = Namespace("https://purl.org/hed/tag#") -# Unit classes, value classes, and units -HEDU = Namespace("https://purl.org/hed/aux#") -# Unit Modifiers -HEDUM = Namespace("https://purl.org/hed/aux/unit_modifier#") - -# Some of this stuff may be commented back in later if needed - -# SECTION_ELEMENT_NAME = { -# HedSectionKey.Tags: "StartSchemaSection", -# HedSectionKey.UnitClasses: "UnitClassSection", -# HedSectionKey.Units: "UnitSection", -# HedSectionKey.UnitModifiers: "UnitModifiersSection", -# HedSectionKey.ValueClasses: "ValueClassesSection", -# HedSectionKey.Attributes: "AttributesSection", -# HedSectionKey.Properties: "PropertiesSection", +# from rdflib import Namespace +# +# from hed.schema.hed_schema_constants import HedSectionKey +# +# +# # Default file associations(notably owl maps to XML format, as we already use XML) +# ext_to_format = { +# ".ttl": "turtle", +# ".owl": "xml", +# ".json-ld": "json-ld" # } # -# SECTION_ELEMENT_TYPE = { -# HedSectionKey.Tags: "HedStartSchemaSection", -# HedSectionKey.UnitClasses: "HedUnitClassSection", -# HedSectionKey.Units: "HedUnitSection", -# HedSectionKey.UnitModifiers: "HedUnitModifiersSection", -# HedSectionKey.ValueClasses: "HedValueClassesSection", -# HedSectionKey.Attributes: "HedAttributesSection", -# HedSectionKey.Properties: "HedPropertiesSection", +# # Core schema structural types in owl +# HED = Namespace("https://purl.org/hed#") +# # Tags +# HEDT = Namespace("https://purl.org/hed/tag#") +# # Unit classes, value classes, and units +# HEDU = Namespace("https://purl.org/hed/aux#") +# # Unit Modifiers +# HEDUM = Namespace("https://purl.org/hed/aux/unit_modifier#") +# +# # Some of this stuff may be commented back in later if needed +# +# # SECTION_ELEMENT_NAME = { +# # HedSectionKey.Tags: "StartSchemaSection", +# # HedSectionKey.UnitClasses: "UnitClassSection", +# # HedSectionKey.Units: "UnitSection", +# # HedSectionKey.UnitModifiers: "UnitModifiersSection", +# # HedSectionKey.ValueClasses: "ValueClassesSection", +# # HedSectionKey.Attributes: "AttributesSection", +# # HedSectionKey.Properties: "PropertiesSection", +# # } +# # +# # SECTION_ELEMENT_TYPE = { +# # HedSectionKey.Tags: "HedStartSchemaSection", +# # HedSectionKey.UnitClasses: "HedUnitClassSection", +# # HedSectionKey.Units: "HedUnitSection", +# # HedSectionKey.UnitModifiers: "HedUnitModifiersSection", +# # HedSectionKey.ValueClasses: "HedValueClassesSection", +# # HedSectionKey.Attributes: "HedAttributesSection", +# # HedSectionKey.Properties: "HedPropertiesSection", +# # } +# +# ELEMENT_NAMES = { +# HedSectionKey.Tags: "HedTag", +# HedSectionKey.Units: "HedUnit", +# HedSectionKey.UnitClasses: "HedUnitClass", +# HedSectionKey.UnitModifiers: "HedUnitModifier", +# HedSectionKey.ValueClasses: "HedValueClass", # } - -ELEMENT_NAMES = { - HedSectionKey.Tags: "HedTag", - HedSectionKey.Units: "HedUnit", - HedSectionKey.UnitClasses: "HedUnitClass", - HedSectionKey.UnitModifiers: "HedUnitModifier", - HedSectionKey.ValueClasses: "HedValueClass", -} diff --git a/hed/schema/schema_io/schema2owl.py b/hed/schema/schema_io/schema2owl.py index b06f8ece..3b8563a2 100644 --- a/hed/schema/schema_io/schema2owl.py +++ b/hed/schema/schema_io/schema2owl.py @@ -1,313 +1,313 @@ -"""Allows output of HedSchema objects as .xml format""" - -from hed.schema.hed_schema_constants import HedSectionKey, HedKey -from hed.schema.schema_io import owl_constants -from hed.schema.schema_io.schema2base import Schema2Base -from rdflib import Graph, RDF, RDFS, Literal, URIRef, OWL, XSD - -from hed.schema.schema_io.owl_constants import HED, HEDT, HEDU, HEDUM -import re - - -HED_URIS = { - None: HED, - HedSectionKey.Tags: HEDT, - HedSectionKey.UnitClasses: HEDU, - HedSectionKey.Units: HEDU, - HedSectionKey.UnitModifiers: HEDUM, - HedSectionKey.ValueClasses: HEDU, - HedSectionKey.Attributes: HED, - HedSectionKey.Properties: HED, -} - -HED_ATTR = { - "unitClass": HEDU, - "valueClass": HEDU, - "unit": HEDU, - "unitModifier": HEDUM, - "property": HED, - "suggestedTag": HEDT, - "relatedTag": HEDT, - "rooted": HEDT, -} - -float_attributes = {"conversionFactor"} - -hed_keys_with_types = { - HedKey.ExtensionAllowed: XSD["boolean"], - HedKey.Recommended: XSD["boolean"], - HedKey.Required: XSD["boolean"], - HedKey.RequireChild: XSD["boolean"], - HedKey.TagGroup: XSD["boolean"], - HedKey.TakesValue: XSD["boolean"], - HedKey.TopLevelTagGroup: XSD["boolean"], - HedKey.Unique: XSD["boolean"], - HedKey.UnitClass: HED["HedUnitClass"], - HedKey.ValueClass: HED["HedValueClass"], - HedKey.RelatedTag: HED["HedTag"], - HedKey.SuggestedTag: HED["HedTag"], - HedKey.Rooted: HED["HedTag"], - HedKey.DeprecatedFrom: XSD["string"], - HedKey.ConversionFactor: XSD["string"], - HedKey.Reserved: XSD["boolean"], - HedKey.SIUnit: XSD["boolean"], - HedKey.UnitSymbol: XSD["boolean"], - HedKey.DefaultUnits: HED["HedUnit"], - HedKey.UnitPrefix: XSD["boolean"], - HedKey.SIUnitModifier: XSD["boolean"], - HedKey.SIUnitSymbolModifier: XSD["boolean"], - HedKey.AllowedCharacter: XSD["string"], - HedKey.InLibrary: XSD["string"] -} - -object_properties = {key for key, value in hed_keys_with_types.items() if value.startswith(HED)} - - -class Schema2Owl(Schema2Base): - def __init__(self): - super().__init__() - self.owl_graph = Graph() - self.output = self.owl_graph - self.owl_graph.bind("hed", HED) - self.owl_graph.bind("hedt", HEDT) - self.owl_graph.bind("hedu", HEDU) - self.owl_graph.bind("hedum", HEDUM) - - # ========================================= - # Required baseclass function - # ========================================= - def _output_header(self, attributes, prologue): - # Create a dictionary mapping label names to property URIs - property_uris = { - "library": HED.Library, - "unmerged": HED.Unmerged, - "version": HED.Version, - "withStandard": HED.WithStandard, - "xmlns:xsi": HED.XSI, - "xsi:noNamespaceSchemaLocation": HED.XSINoNamespaceSchemaLocation - } - - for attrib_label, attrib_value in attributes.items(): - prop_uri = property_uris.get(attrib_label) - if prop_uri: - self.owl_graph.add((prop_uri, RDF.type, HED.HeaderMember)) - self.owl_graph.add((prop_uri, RDFS.label, Literal(attrib_label))) - self.owl_graph.add((prop_uri, HED.HeaderAttribute, Literal(attrib_value))) - - self.owl_graph.add((HED.Prologue, RDF.type, HED.HedElement)) - self.owl_graph.add((HED.Prologue, RDFS.label, Literal("epilogue"))) - if prologue: - self.owl_graph.add((HED.Prologue, HED["elementValue"], Literal(prologue))) - - def _output_footer(self, epilogue): - self.owl_graph.add((HED.Epilogue, RDF.type, HED.HedElement)) - self.owl_graph.add((HED.Epilogue, RDFS.label, Literal("epilogue"))) - if epilogue: - self.owl_graph.add((HED.Epilogue, HED["elementValue"], Literal(epilogue))) - - def _start_section(self, key_class): - return None - - def _end_tag_section(self): - pass - - def _write_attributes(self, entry_uri, entry): - for attribute, value in entry.attributes.items(): - is_bool = entry.attribute_has_property(attribute, "boolProperty") \ - or entry.section_key == HedSectionKey.Attributes - - if self._attribute_disallowed(attribute): - continue - - if is_bool: - self.owl_graph.add((entry_uri, HED[attribute], Literal(True))) - - elif attribute in float_attributes: - # Treat as a string for now - self.owl_graph.add((entry_uri, HED[attribute], Literal(value))) - else: - # Todo: further develop this if needed or merge into base tools - values = value.split(",") - for val2 in values: - clean_value = val2 - if attribute in HED_ATTR: - attribute_uri = HED_ATTR[attribute][clean_value] - else: - attribute_uri = Literal(clean_value) - - self.owl_graph.add((entry_uri, HED[attribute], attribute_uri)) - - def _add_entry(self, base_uri, tag_name, label, comment, parent=None, entry=None, - tag_type=HED.HedTag, unit_class_uri=None): - is_takes_value = entry.has_attribute("takesValue") - if is_takes_value: - tag_type = HED.HedPlaceholder - tag_name = entry.short_tag_name + "-Placeholder" - label = "#" - - tag_name = sanitize_for_turtle(tag_name) - uri = f"{base_uri}{tag_name}" - hed_tag_uri = URIRef(uri) - - self.owl_graph.add((hed_tag_uri, RDF.type, tag_type)) - self.owl_graph.add((hed_tag_uri, RDFS.label, Literal(label))) - if comment: - self.owl_graph.add((hed_tag_uri, RDFS.comment, Literal(comment))) - # Don't store the parent in unmerged rooted nodes - if parent is not None and (HedKey.Rooted not in entry.attributes or self._save_merged): - parent_uri = HEDT[parent] - self.owl_graph.add((hed_tag_uri, HED.hasHedParent, parent_uri)) - if unit_class_uri is not None: - self.owl_graph.add((hed_tag_uri, HED.unitClass, unit_class_uri)) - self._write_attributes(hed_tag_uri, entry) - return hed_tag_uri - - def _add_property(self, base_uri, name, label, comment, entry, - data_type, sub_type): - name = sanitize_for_turtle(name) - uri = f"{base_uri}{name}" - hed_tag_uri = URIRef(uri) - - self.owl_graph.add((hed_tag_uri, RDF.type, data_type)) - self.owl_graph.add((hed_tag_uri, RDFS.subPropertyOf, sub_type)) - self.owl_graph.add((hed_tag_uri, RDFS.range, XSD.boolean)) - self.owl_graph.add((hed_tag_uri, RDFS.label, Literal(label))) - self.owl_graph.add((hed_tag_uri, RDFS.comment, Literal(comment))) - self._write_attributes(hed_tag_uri, entry) - - return hed_tag_uri - - def _get_element_domains(self, entry): - domain_table = {HedKey.ValueClassProperty: "HedValueClass", - HedKey.UnitModifierProperty: "HedUnitModifier", - HedKey.UnitProperty: "HedUnit", - HedKey.ElementProperty: "HedElement", - HedKey.UnitClassProperty: "HedUnitClass", - HedKey.NodeProperty: "HedTag" - } - domains = [] - for attribute in entry.attributes: - if attribute in domain_table: - domains.append(domain_table[attribute]) - - if not domains: - domains.append(domain_table[HedKey.NodeProperty]) - - return domains - - def _add_attribute(self, base_uri, name, label, comment, entry): - domains = self._get_element_domains(entry) - name = sanitize_for_turtle(name) - uri = f"{base_uri}{name}" - hed_tag_uri = URIRef(uri) - data_type = OWL.ObjectProperty - sub_type = HED.schemaAttributeObjectProperty - if name not in object_properties: - data_type = OWL.DatatypeProperty - sub_type = HED.schemaAttributeDatatypeProperty - self.owl_graph.add((hed_tag_uri, RDF.type, data_type)) - for domain in domains: - self.owl_graph.add((hed_tag_uri, RDFS.domain, HED[domain])) - self.owl_graph.add((hed_tag_uri, RDFS.subPropertyOf, sub_type)) - self.owl_graph.add((hed_tag_uri, RDFS.range, hed_keys_with_types[name])) - self.owl_graph.add((hed_tag_uri, RDFS.label, Literal(label))) - self.owl_graph.add((hed_tag_uri, RDFS.comment, Literal(comment))) - self._write_attributes(hed_tag_uri, entry) - - return hed_tag_uri - - def _write_tag_entry(self, tag_entry, parent_node=None, level=0): - """ - Creates a tag node and adds it to the parent. - - Parameters - ---------- - tag_entry: HedTagEntry - The entry for that tag we want to write out - parent_node: Any - Unused - level: Any - Unused - - Returns - ------- - SubElement - The added node - """ - tag_name = tag_entry.short_tag_name - parent = tag_entry.parent - if parent: - parent = parent.short_tag_name - comment = tag_entry.description - return self._add_entry( - HEDT, - tag_name=tag_name, - label=tag_name, - comment=comment, - parent=parent, - entry=tag_entry - ) - - def _write_entry(self, entry, parent_node=None, include_props=True): - """ - Creates an entry node and adds it to the parent. - - Parameters: - entry(HedSchemaEntry): The entry for that tag we want to write out - parent_node(str): URI for unit class owner, if this is a unit - include_props(bool): Add the description and attributes to new node. - Returns: - str: The added URI - """ - key_class = entry.section_key - prefix = HED_URIS[key_class] - name = entry.name - comment = entry.description - if key_class == HedSectionKey.Attributes: - uri = self._add_attribute( - prefix, - name=name, - label=name, - comment=comment, - entry=entry - ) - elif key_class == HedSectionKey.Properties: - uri = self._add_property( - prefix, - name=name, - label=name, - comment=comment, - entry=entry, - data_type=OWL.AnnotationProperty, - sub_type=HED.schemaProperty - ) - else: - unit_class_uri = None - if key_class == HedSectionKey.Units: - unit_class_uri = parent_node - uri = self._add_entry( - prefix, - tag_name=name, - label=name, - comment=comment, - entry=entry, - tag_type=HED[owl_constants.ELEMENT_NAMES[key_class]], - unit_class_uri=unit_class_uri - ) - return uri - - -def sanitize_for_turtle(name): - """ Sanitizes a string to be a valid IRIREF in Turtle, based on the SPARQL grammar. - - Excludes: `control characters, space, <, >, double quote, {, }, |, ^, backtick, and backslash.` - Replacing them with underscores - - Parameters: - name (str): The string to sanitize. - - Returns: - str: A sanitized string suitable for use as an IRIREF in Turtle. - """ - invalid_chars_pattern = r'[\x00-\x20<>"{}\|^`\\]' - return re.sub(invalid_chars_pattern, '_', name) +# """Allows output of HedSchema objects as .xml format""" +# +# from hed.schema.hed_schema_constants import HedSectionKey, HedKey +# from hed.schema.schema_io import owl_constants +# from hed.schema.schema_io.schema2base import Schema2Base +# from rdflib import Graph, RDF, RDFS, Literal, URIRef, OWL, XSD +# +# from hed.schema.schema_io.owl_constants import HED, HEDT, HEDU, HEDUM +# import re +# +# +# HED_URIS = { +# None: HED, +# HedSectionKey.Tags: HEDT, +# HedSectionKey.UnitClasses: HEDU, +# HedSectionKey.Units: HEDU, +# HedSectionKey.UnitModifiers: HEDUM, +# HedSectionKey.ValueClasses: HEDU, +# HedSectionKey.Attributes: HED, +# HedSectionKey.Properties: HED, +# } +# +# HED_ATTR = { +# "unitClass": HEDU, +# "valueClass": HEDU, +# "unit": HEDU, +# "unitModifier": HEDUM, +# "property": HED, +# "suggestedTag": HEDT, +# "relatedTag": HEDT, +# "rooted": HEDT, +# } +# +# float_attributes = {"conversionFactor"} +# +# hed_keys_with_types = { +# HedKey.ExtensionAllowed: XSD["boolean"], +# HedKey.Recommended: XSD["boolean"], +# HedKey.Required: XSD["boolean"], +# HedKey.RequireChild: XSD["boolean"], +# HedKey.TagGroup: XSD["boolean"], +# HedKey.TakesValue: XSD["boolean"], +# HedKey.TopLevelTagGroup: XSD["boolean"], +# HedKey.Unique: XSD["boolean"], +# HedKey.UnitClass: HED["HedUnitClass"], +# HedKey.ValueClass: HED["HedValueClass"], +# HedKey.RelatedTag: HED["HedTag"], +# HedKey.SuggestedTag: HED["HedTag"], +# HedKey.Rooted: HED["HedTag"], +# HedKey.DeprecatedFrom: XSD["string"], +# HedKey.ConversionFactor: XSD["string"], +# HedKey.Reserved: XSD["boolean"], +# HedKey.SIUnit: XSD["boolean"], +# HedKey.UnitSymbol: XSD["boolean"], +# HedKey.DefaultUnits: HED["HedUnit"], +# HedKey.UnitPrefix: XSD["boolean"], +# HedKey.SIUnitModifier: XSD["boolean"], +# HedKey.SIUnitSymbolModifier: XSD["boolean"], +# HedKey.AllowedCharacter: XSD["string"], +# HedKey.InLibrary: XSD["string"] +# } +# +# object_properties = {key for key, value in hed_keys_with_types.items() if value.startswith(HED)} +# +# +# class Schema2Owl(Schema2Base): +# def __init__(self): +# super().__init__() +# self.owl_graph = Graph() +# self.output = self.owl_graph +# self.owl_graph.bind("hed", HED) +# self.owl_graph.bind("hedt", HEDT) +# self.owl_graph.bind("hedu", HEDU) +# self.owl_graph.bind("hedum", HEDUM) +# +# # ========================================= +# # Required baseclass function +# # ========================================= +# def _output_header(self, attributes, prologue): +# # Create a dictionary mapping label names to property URIs +# property_uris = { +# "library": HED.Library, +# "unmerged": HED.Unmerged, +# "version": HED.Version, +# "withStandard": HED.WithStandard, +# "xmlns:xsi": HED.XSI, +# "xsi:noNamespaceSchemaLocation": HED.XSINoNamespaceSchemaLocation +# } +# +# for attrib_label, attrib_value in attributes.items(): +# prop_uri = property_uris.get(attrib_label) +# if prop_uri: +# self.owl_graph.add((prop_uri, RDF.type, HED.HeaderMember)) +# self.owl_graph.add((prop_uri, RDFS.label, Literal(attrib_label))) +# self.owl_graph.add((prop_uri, HED.HeaderAttribute, Literal(attrib_value))) +# +# self.owl_graph.add((HED.Prologue, RDF.type, HED.HedElement)) +# self.owl_graph.add((HED.Prologue, RDFS.label, Literal("epilogue"))) +# if prologue: +# self.owl_graph.add((HED.Prologue, HED["elementValue"], Literal(prologue))) +# +# def _output_footer(self, epilogue): +# self.owl_graph.add((HED.Epilogue, RDF.type, HED.HedElement)) +# self.owl_graph.add((HED.Epilogue, RDFS.label, Literal("epilogue"))) +# if epilogue: +# self.owl_graph.add((HED.Epilogue, HED["elementValue"], Literal(epilogue))) +# +# def _start_section(self, key_class): +# return None +# +# def _end_tag_section(self): +# pass +# +# def _write_attributes(self, entry_uri, entry): +# for attribute, value in entry.attributes.items(): +# is_bool = entry.attribute_has_property(attribute, "boolProperty") \ +# or entry.section_key == HedSectionKey.Attributes +# +# if self._attribute_disallowed(attribute): +# continue +# +# if is_bool: +# self.owl_graph.add((entry_uri, HED[attribute], Literal(True))) +# +# elif attribute in float_attributes: +# # Treat as a string for now +# self.owl_graph.add((entry_uri, HED[attribute], Literal(value))) +# else: +# # Todo: further develop this if needed or merge into base tools +# values = value.split(",") +# for val2 in values: +# clean_value = val2 +# if attribute in HED_ATTR: +# attribute_uri = HED_ATTR[attribute][clean_value] +# else: +# attribute_uri = Literal(clean_value) +# +# self.owl_graph.add((entry_uri, HED[attribute], attribute_uri)) +# +# def _add_entry(self, base_uri, tag_name, label, comment, parent=None, entry=None, +# tag_type=HED.HedTag, unit_class_uri=None): +# is_takes_value = entry.has_attribute("takesValue") +# if is_takes_value: +# tag_type = HED.HedPlaceholder +# tag_name = entry.short_tag_name + "-Placeholder" +# label = "#" +# +# tag_name = sanitize_for_turtle(tag_name) +# uri = f"{base_uri}{tag_name}" +# hed_tag_uri = URIRef(uri) +# +# self.owl_graph.add((hed_tag_uri, RDF.type, tag_type)) +# self.owl_graph.add((hed_tag_uri, RDFS.label, Literal(label))) +# if comment: +# self.owl_graph.add((hed_tag_uri, RDFS.comment, Literal(comment))) +# # Don't store the parent in unmerged rooted nodes +# if parent is not None and (HedKey.Rooted not in entry.attributes or self._save_merged): +# parent_uri = HEDT[parent] +# self.owl_graph.add((hed_tag_uri, HED.hasHedParent, parent_uri)) +# if unit_class_uri is not None: +# self.owl_graph.add((hed_tag_uri, HED.unitClass, unit_class_uri)) +# self._write_attributes(hed_tag_uri, entry) +# return hed_tag_uri +# +# def _add_property(self, base_uri, name, label, comment, entry, +# data_type, sub_type): +# name = sanitize_for_turtle(name) +# uri = f"{base_uri}{name}" +# hed_tag_uri = URIRef(uri) +# +# self.owl_graph.add((hed_tag_uri, RDF.type, data_type)) +# self.owl_graph.add((hed_tag_uri, RDFS.subPropertyOf, sub_type)) +# self.owl_graph.add((hed_tag_uri, RDFS.range, XSD.boolean)) +# self.owl_graph.add((hed_tag_uri, RDFS.label, Literal(label))) +# self.owl_graph.add((hed_tag_uri, RDFS.comment, Literal(comment))) +# self._write_attributes(hed_tag_uri, entry) +# +# return hed_tag_uri +# +# def _get_element_domains(self, entry): +# domain_table = {HedKey.ValueClassProperty: "HedValueClass", +# HedKey.UnitModifierProperty: "HedUnitModifier", +# HedKey.UnitProperty: "HedUnit", +# HedKey.ElementProperty: "HedElement", +# HedKey.UnitClassProperty: "HedUnitClass", +# HedKey.NodeProperty: "HedTag" +# } +# domains = [] +# for attribute in entry.attributes: +# if attribute in domain_table: +# domains.append(domain_table[attribute]) +# +# if not domains: +# domains.append(domain_table[HedKey.NodeProperty]) +# +# return domains +# +# def _add_attribute(self, base_uri, name, label, comment, entry): +# domains = self._get_element_domains(entry) +# name = sanitize_for_turtle(name) +# uri = f"{base_uri}{name}" +# hed_tag_uri = URIRef(uri) +# data_type = OWL.ObjectProperty +# sub_type = HED.schemaAttributeObjectProperty +# if name not in object_properties: +# data_type = OWL.DatatypeProperty +# sub_type = HED.schemaAttributeDatatypeProperty +# self.owl_graph.add((hed_tag_uri, RDF.type, data_type)) +# for domain in domains: +# self.owl_graph.add((hed_tag_uri, RDFS.domain, HED[domain])) +# self.owl_graph.add((hed_tag_uri, RDFS.subPropertyOf, sub_type)) +# self.owl_graph.add((hed_tag_uri, RDFS.range, hed_keys_with_types[name])) +# self.owl_graph.add((hed_tag_uri, RDFS.label, Literal(label))) +# self.owl_graph.add((hed_tag_uri, RDFS.comment, Literal(comment))) +# self._write_attributes(hed_tag_uri, entry) +# +# return hed_tag_uri +# +# def _write_tag_entry(self, tag_entry, parent_node=None, level=0): +# """ +# Creates a tag node and adds it to the parent. +# +# Parameters +# ---------- +# tag_entry: HedTagEntry +# The entry for that tag we want to write out +# parent_node: Any +# Unused +# level: Any +# Unused +# +# Returns +# ------- +# SubElement +# The added node +# """ +# tag_name = tag_entry.short_tag_name +# parent = tag_entry.parent +# if parent: +# parent = parent.short_tag_name +# comment = tag_entry.description +# return self._add_entry( +# HEDT, +# tag_name=tag_name, +# label=tag_name, +# comment=comment, +# parent=parent, +# entry=tag_entry +# ) +# +# def _write_entry(self, entry, parent_node=None, include_props=True): +# """ +# Creates an entry node and adds it to the parent. +# +# Parameters: +# entry(HedSchemaEntry): The entry for that tag we want to write out +# parent_node(str): URI for unit class owner, if this is a unit +# include_props(bool): Add the description and attributes to new node. +# Returns: +# str: The added URI +# """ +# key_class = entry.section_key +# prefix = HED_URIS[key_class] +# name = entry.name +# comment = entry.description +# if key_class == HedSectionKey.Attributes: +# uri = self._add_attribute( +# prefix, +# name=name, +# label=name, +# comment=comment, +# entry=entry +# ) +# elif key_class == HedSectionKey.Properties: +# uri = self._add_property( +# prefix, +# name=name, +# label=name, +# comment=comment, +# entry=entry, +# data_type=OWL.AnnotationProperty, +# sub_type=HED.schemaProperty +# ) +# else: +# unit_class_uri = None +# if key_class == HedSectionKey.Units: +# unit_class_uri = parent_node +# uri = self._add_entry( +# prefix, +# tag_name=name, +# label=name, +# comment=comment, +# entry=entry, +# tag_type=HED[owl_constants.ELEMENT_NAMES[key_class]], +# unit_class_uri=unit_class_uri +# ) +# return uri +# +# +# def sanitize_for_turtle(name): +# """ Sanitizes a string to be a valid IRIREF in Turtle, based on the SPARQL grammar. +# +# Excludes: `control characters, space, <, >, double quote, {, }, |, ^, backtick, and backslash.` +# Replacing them with underscores +# +# Parameters: +# name (str): The string to sanitize. +# +# Returns: +# str: A sanitized string suitable for use as an IRIREF in Turtle. +# """ +# invalid_chars_pattern = r'[\x00-\x20<>"{}\|^`\\]' +# return re.sub(invalid_chars_pattern, '_', name) diff --git a/tests/schema/test_hed_schema_io.py b/tests/schema/test_hed_schema_io.py index 7e69a3f6..ade99fab 100644 --- a/tests/schema/test_hed_schema_io.py +++ b/tests/schema/test_hed_schema_io.py @@ -578,80 +578,80 @@ def test_triple_prefixes(self): {"test": "test:score,testlib", "ol": "ol:otherlib", "abc": "abc:anotherlib"}) -class TestOwlBase(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.base_schema = schema.load_schema_version("8.2.0") - - @with_temp_file(".owl") - def test_schema2xml(self, filename): - self.base_schema.save_as_owl(filename) - loaded_schema = schema.load_schema(filename) - - self.assertEqual(loaded_schema, self.base_schema) - - self.base_schema.save_as_owl(filename, save_merged=True) - loaded_schema = schema.load_schema(filename) - - self.assertEqual(loaded_schema, self.base_schema) - - @with_temp_file(".ttl") - def test_schema2turtle(self, filename): - self.base_schema.save_as_owl(filename) - loaded_schema = schema.load_schema(filename) - - self.assertEqual(loaded_schema, self.base_schema) - - self.base_schema.save_as_owl(filename, save_merged=True) - loaded_schema = schema.load_schema(filename) - - self.assertEqual(loaded_schema, self.base_schema) - - @with_temp_file(".json-ld") - def test_schema2jsonld(self, filename): - self.base_schema.save_as_owl(filename) - loaded_schema = schema.load_schema(filename) - - self.assertEqual(loaded_schema, self.base_schema) - - self.base_schema.save_as_owl(filename, save_merged=True) - loaded_schema = schema.load_schema(filename) - - self.assertEqual(loaded_schema, self.base_schema) - - def test_schema2owlstring(self): - owl_string = self.base_schema.get_as_owl_string(file_format="turtle") - loaded_schema = schema.from_string(owl_string, schema_format="turtle") - - self.assertEqual(loaded_schema, self.base_schema) - - owl_string = self.base_schema.get_as_owl_string(save_merged=True, file_format="turtle") - loaded_schema = schema.from_string(owl_string, schema_format="turtle") - - self.assertEqual(loaded_schema, self.base_schema) - - def test_schema2bad_filename(self): - with self.assertRaises(OSError): - self.base_schema.save_as_owl("", file_format="xml") - with self.assertRaises(OSError): - self.base_schema.save_as_owl("/////////", file_format="xml") - - def test_schema2bad_filename_rdf_format(self): - with self.assertRaises(rdflib.plugin.PluginException): - self.base_schema.save_as_owl("valid_filename.invalid_extension") - with self.assertRaises(rdflib.plugin.PluginException): - self.base_schema.save_as_owl("") - with self.assertRaises(rdflib.plugin.PluginException): - self.base_schema.save_as_owl("", file_format="unknown") - - -class TestOwlLibRooted(TestOwlBase): - @classmethod - def setUpClass(cls): - cls.base_schema = schema.load_schema_version("testlib_2.0.0") - - -class TestOwlLib(TestOwlBase): - @classmethod - def setUpClass(cls): - cls.base_schema = schema.load_schema_version("score_1.1.0") +# class TestOwlBase(unittest.TestCase): +# @classmethod +# def setUpClass(cls): +# cls.base_schema = schema.load_schema_version("8.2.0") +# +# @with_temp_file(".owl") +# def test_schema2xml(self, filename): +# self.base_schema.save_as_owl(filename) +# loaded_schema = schema.load_schema(filename) +# +# self.assertEqual(loaded_schema, self.base_schema) +# +# self.base_schema.save_as_owl(filename, save_merged=True) +# loaded_schema = schema.load_schema(filename) +# +# self.assertEqual(loaded_schema, self.base_schema) +# +# @with_temp_file(".ttl") +# def test_schema2turtle(self, filename): +# self.base_schema.save_as_owl(filename) +# loaded_schema = schema.load_schema(filename) +# +# self.assertEqual(loaded_schema, self.base_schema) +# +# self.base_schema.save_as_owl(filename, save_merged=True) +# loaded_schema = schema.load_schema(filename) +# +# self.assertEqual(loaded_schema, self.base_schema) +# +# @with_temp_file(".json-ld") +# def test_schema2jsonld(self, filename): +# self.base_schema.save_as_owl(filename) +# loaded_schema = schema.load_schema(filename) +# +# self.assertEqual(loaded_schema, self.base_schema) +# +# self.base_schema.save_as_owl(filename, save_merged=True) +# loaded_schema = schema.load_schema(filename) +# +# self.assertEqual(loaded_schema, self.base_schema) +# +# def test_schema2owlstring(self): +# owl_string = self.base_schema.get_as_owl_string(file_format="turtle") +# loaded_schema = schema.from_string(owl_string, schema_format="turtle") +# +# self.assertEqual(loaded_schema, self.base_schema) +# +# owl_string = self.base_schema.get_as_owl_string(save_merged=True, file_format="turtle") +# loaded_schema = schema.from_string(owl_string, schema_format="turtle") +# +# self.assertEqual(loaded_schema, self.base_schema) +# +# def test_schema2bad_filename(self): +# with self.assertRaises(OSError): +# self.base_schema.save_as_owl("", file_format="xml") +# with self.assertRaises(OSError): +# self.base_schema.save_as_owl("/////////", file_format="xml") +# +# def test_schema2bad_filename_rdf_format(self): +# with self.assertRaises(rdflib.plugin.PluginException): +# self.base_schema.save_as_owl("valid_filename.invalid_extension") +# with self.assertRaises(rdflib.plugin.PluginException): +# self.base_schema.save_as_owl("") +# with self.assertRaises(rdflib.plugin.PluginException): +# self.base_schema.save_as_owl("", file_format="unknown") +# +# +# class TestOwlLibRooted(TestOwlBase): +# @classmethod +# def setUpClass(cls): +# cls.base_schema = schema.load_schema_version("testlib_2.0.0") +# +# +# class TestOwlLib(TestOwlBase): +# @classmethod +# def setUpClass(cls): +# cls.base_schema = schema.load_schema_version("score_1.1.0")