diff --git a/hed/errors/error_reporter.py b/hed/errors/error_reporter.py index 4f8ba17f1..2a8d0353c 100644 --- a/hed/errors/error_reporter.py +++ b/hed/errors/error_reporter.py @@ -22,7 +22,6 @@ ErrorContext.ROW, ErrorContext.COLUMN, ErrorContext.LINE, - # ErrorContext.HED_STRING, # temporarily disable hed string sort(maybe perm, not sure it's needed) ErrorContext.SCHEMA_SECTION, ErrorContext.SCHEMA_TAG, ErrorContext.SCHEMA_ATTRIBUTE, @@ -33,10 +32,6 @@ ErrorContext.ROW ] -hed_string_sort_list = [ - ErrorContext.HED_STRING -] - def _register_error_function(error_type, wrapper_func): if error_type in error_functions: raise KeyError(f"{error_type} defined more than once.") @@ -191,11 +186,8 @@ def push_error_context(self, context_type, context): """ if context is None: - from hed import HedString if context_type in int_sort_list: context = 0 - elif context_type in hed_string_sort_list: - context = HedString("") else: context = "" self.error_context.append((context_type, context)) @@ -430,8 +422,6 @@ def _get_keys(d): for key in default_sort_list: if key in int_sort_list: result.append(d.get(key, -1)) - elif key in hed_string_sort_list: - result.append(d.get(key, HedString(""))) else: result.append(d.get(key, "")) return tuple(result) diff --git a/hed/models/__init__.py b/hed/models/__init__.py index 73ac61deb..f2f1a600f 100644 --- a/hed/models/__init__.py +++ b/hed/models/__init__.py @@ -9,7 +9,6 @@ from .hed_group import HedGroup from .spreadsheet_input import SpreadsheetInput from .hed_string import HedString -from .hed_string_group import HedStringGroup from .hed_tag import HedTag from .sidecar import Sidecar from .tabular_input import TabularInput diff --git a/hed/models/base_input.py b/hed/models/base_input.py index 852d7bd6f..745d40811 100644 --- a/hed/models/base_input.py +++ b/hed/models/base_input.py @@ -493,7 +493,7 @@ def combine_dataframe(dataframe): ) return dataframe - def get_def_dict(self, hed_schema=None, extra_def_dicts=None): + def get_def_dict(self, hed_schema, extra_def_dicts=None): """ Returns the definition dict for this file Note: Baseclass implementation returns just extra_def_dicts. diff --git a/hed/models/column_mapper.py b/hed/models/column_mapper.py index fedac6d8f..1321e9d6d 100644 --- a/hed/models/column_mapper.py +++ b/hed/models/column_mapper.py @@ -378,11 +378,11 @@ def _finalize_mapping(self): def _remove_from_list(list_to_alter, to_remove): return [item for item in list_to_alter if item not in to_remove] - def get_def_dict(self, hed_schema=None, extra_def_dicts=None): + def get_def_dict(self, hed_schema, extra_def_dicts=None): """ Return def dicts from every column description. Parameters: - hed_schema (Schema or None): A HED schema object to use for extracting definitions. + hed_schema (Schema): A HED schema object to use for extracting definitions. extra_def_dicts (list, DefinitionDict, or None): Extra dicts to add to the list. Returns: diff --git a/hed/models/def_expand_gather.py b/hed/models/def_expand_gather.py index 380079a42..662ec2e54 100644 --- a/hed/models/def_expand_gather.py +++ b/hed/models/def_expand_gather.py @@ -201,7 +201,4 @@ def get_ambiguous_group(ambiguous_def): Returns: HedGroup: the ambiguous definition with known placeholders filled in """ - if not ambiguous_def: - # mostly to not crash, this shouldn't happen. - return HedString("") return ambiguous_def.get_group() diff --git a/hed/models/definition_dict.py b/hed/models/definition_dict.py index 0fa6aa743..dda340eb3 100644 --- a/hed/models/definition_dict.py +++ b/hed/models/definition_dict.py @@ -50,7 +50,7 @@ def add_definitions(self, def_dicts, hed_schema=None): for definition in def_dict: self.check_for_definitions(HedString(definition, hed_schema)) else: - raise TypeError("Invalid type '{type(def_dict)}' passed to DefinitionDict") + raise TypeError(f"Invalid type '{type(def_dict)}' passed to DefinitionDict") def _add_definition(self, def_tag, def_value): if def_tag in self.defs: diff --git a/hed/models/definition_entry.py b/hed/models/definition_entry.py index 190d8d3d3..23845709a 100644 --- a/hed/models/definition_entry.py +++ b/hed/models/definition_entry.py @@ -18,7 +18,8 @@ def __init__(self, name, contents, takes_value, source_context): """ self.name = name if contents: - contents = contents.copy().sort() + contents = contents.copy() + contents.sort() self.contents = contents self.takes_value = takes_value self.source_context = source_context diff --git a/hed/models/hed_group.py b/hed/models/hed_group.py index eeacd16db..ba3fc287c 100644 --- a/hed/models/hed_group.py +++ b/hed/models/hed_group.py @@ -126,13 +126,8 @@ def copy(self): return return_copy def sort(self): - """ Sort the tags and groups in this HedString in a consistent order. - - Returns: - self - """ + """ Sort the tags and groups in this HedString in a consistent order.""" self.sorted(update_self=True) - return self def sorted(self, update_self=False): """ Returns a sorted copy of this hed group as a list of it's children diff --git a/hed/models/hed_string.py b/hed/models/hed_string.py index 173b67860..84af5e17b 100644 --- a/hed/models/hed_string.py +++ b/hed/models/hed_string.py @@ -1,6 +1,7 @@ """ This module is used to split tags in a HED string. """ +import copy from hed.models.hed_group import HedGroup from hed.models.hed_tag import HedTag from hed.models.model_constants import DefTagNames @@ -12,14 +13,15 @@ class HedString(HedGroup): OPENING_GROUP_CHARACTER = '(' CLOSING_GROUP_CHARACTER = ')' - def __init__(self, hed_string, hed_schema=None, def_dict=None, _contents=None): + def __init__(self, hed_string, hed_schema, def_dict=None, _contents=None): """ Constructor for the HedString class. Parameters: hed_string (str): A HED string consisting of tags and tag groups. - hed_schema (HedSchema or None): The schema to use to identify tags. Can be passed later. + hed_schema (HedSchema): The schema to use to identify tags. + def_dict(DefinitionDict or None): The def dict to use to identify def/def expand tags. _contents ([HedGroup and/or HedTag] or None): Create a HedString from this exact list of children. - Does not make a copy. + Does not make a copy. Notes: - The HedString object parses its component tags and groups into a tree-like structure. @@ -33,27 +35,36 @@ def __init__(self, hed_string, hed_schema=None, def_dict=None, _contents=None): except ValueError: contents = [] super().__init__(hed_string, contents=contents, startpos=0, endpos=len(hed_string)) + self._schema = hed_schema + self._from_strings = None @classmethod - def from_hed_strings(cls, contents): + def from_hed_strings(cls, hed_strings): """ Factory for creating HedStrings via combination. Parameters: - contents (list or None): A list of HedString objects to combine. This takes ownership of their children. + hed_strings (list or None): A list of HedString objects to combine. + This takes ownership of their children. + Returns: + new_string(HedString): The newly combined HedString """ - result = HedString.__new__(HedString) - hed_string = "".join([group._hed_string for group in contents]) - contents = [child for sub_string in contents for child in sub_string.children] - result.__init__(hed_string=hed_string, _contents=contents) - return result + if not hed_strings: + raise TypeError("Passed an empty list to from_hed_strings") + new_string = HedString.__new__(HedString) + hed_string = ",".join([group._hed_string for group in hed_strings]) + contents = [child for sub_string in hed_strings for child in sub_string.children] + first_schema = hed_strings[0]._schema + new_string.__init__(hed_string=hed_string, _contents=contents, hed_schema=first_schema) + new_string._from_strings = hed_strings + return new_string @property def is_group(self): """ Always False since the underlying string is not a group with parentheses. """ return False - def convert_to_canonical_forms(self, hed_schema): + def _calculate_to_canonical_forms(self, hed_schema): """ Identify all tags using the given schema. Parameters: @@ -65,10 +76,39 @@ def convert_to_canonical_forms(self, hed_schema): """ validation_issues = [] for tag in self.get_all_tags(): - validation_issues += tag.convert_to_canonical_forms(hed_schema) + validation_issues += tag._calculate_to_canonical_forms(hed_schema) return validation_issues + def __deepcopy__(self, memo): + # check if the object has already been copied + if id(self) in memo: + return memo[id(self)] + + # create a new instance of HedString class, and direct copy all parameters + new_string = self.__class__.__new__(self.__class__) + new_string.__dict__.update(self.__dict__) + + # add the new object to the memo dictionary + memo[id(self)] = new_string + + # Deep copy the attributes that need it(most notably, we don't copy schema/schema entry) + new_string._original_children = copy.deepcopy(self._original_children, memo) + new_string._from_strings = copy.deepcopy(self._from_strings, memo) + new_string._children = copy.deepcopy(self._children, memo) + + return new_string + + def copy(self): + """ Return a deep copy of this string. + + Returns: + HedGroup: The copied group. + + """ + return_copy = copy.deepcopy(self) + return return_copy + def remove_definitions(self): """ Remove definition tags and groups from this string. @@ -118,45 +158,7 @@ def expand_defs(self): return self - def convert_to_short(self, hed_schema): - """ Compute canonical forms and return the short form. - - Parameters: - hed_schema (HedSchema or None): The schema to use to calculate forms. - - Returns: - tuple: - - str: The string with all tags converted to short form. - - list: A list of issues found during conversion. Each issue is a dictionary. - - Notes: - - No issues will be found if no schema is passed. - - """ - conversion_issues = self.convert_to_canonical_forms(hed_schema) - short_string = self.get_as_short() - return short_string, conversion_issues - - def convert_to_long(self, hed_schema): - """ Compute canonical forms and return the long form. - - Parameters: - hed_schema (HedSchema or None): The schema to use to calculate forms. - - Returns: - tuple: - - str: The string with all tags converted to long form. - - list: A list of issues found during conversion. Each issue is a dictionary. - - Notes: - - No issues will be found if no schema is passed. - - """ - conversion_issues = self.convert_to_canonical_forms(hed_schema) - short_string = self.get_as_long() - return short_string, conversion_issues - - def convert_to_original(self): + def get_as_original(self): """ Return the original form of this string. Returns: @@ -164,17 +166,16 @@ def convert_to_original(self): Notes: Potentially with some extraneous spaces removed on returned string. - """ return self.get_as_form("org_tag") @staticmethod - def split_into_groups(hed_string, hed_schema=None, def_dict=None): + def split_into_groups(hed_string, hed_schema, def_dict=None): """ Split the HED string into a parse tree. Parameters: hed_string (str): A hed string consisting of tags and tag groups to be processed. - hed_schema (HedSchema or None): HED schema to use to identify tags. + hed_schema (HedSchema): HED schema to use to identify tags. def_dict(DefinitionDict): The definitions to identify Returns: list: A list of HedTag and/or HedGroup. @@ -190,7 +191,7 @@ def split_into_groups(hed_string, hed_schema=None, def_dict=None): input_tags = HedString.split_hed_string(hed_string) for is_hed_tag, (startpos, endpos) in input_tags: if is_hed_tag: - new_tag = HedTag(hed_string, (startpos, endpos), hed_schema, def_dict) + new_tag = HedTag(hed_string, hed_schema, (startpos, endpos), def_dict) current_tag_group[-1].append(new_tag) else: string_portion = hed_string[startpos:endpos] @@ -239,11 +240,30 @@ def _get_org_span(self, tag_or_group): - If the hed tag or group was not in the original string, returns (None, None). """ + if self._from_strings: + return self._get_org_span_from_strings(tag_or_group) + if self.check_if_in_original(tag_or_group): return tag_or_group.span return None, None + def _get_org_span_from_strings(self, tag_or_group): + """A different case of the above, to handle if this was created from hed string objects.""" + found_string = None + string_start_index = 0 + for string in self._from_strings: + if string.check_if_in_original(tag_or_group): + found_string = string + break + # Add 1 for comma + string_start_index += string.span[1] + 1 + + if not found_string: + return None, None + + return tag_or_group.span[0] + string_start_index, tag_or_group.span[1] + string_start_index + @staticmethod def split_hed_string(hed_string): """ Split a HED string into delimiters and tags. diff --git a/hed/models/hed_string_group.py b/hed/models/hed_string_group.py deleted file mode 100644 index 3171823ce..000000000 --- a/hed/models/hed_string_group.py +++ /dev/null @@ -1,125 +0,0 @@ -""" - This module is used to easily concatenate multiple hed strings in place -""" -from hed.models.hed_string import HedString - - -class HedStringGroup(HedString): - """ A container with hed string objects. - - Notes: - - Often this is used for assembling the hed strings from multiple columns. - - The HedStringGroup passes through many of the HedString operations. - - """ - - def __init__(self, hed_string_obj_list): - """ Constructor for the HedStringGroup class. - - Parameters: - hed_string_obj_list ([HedString]): A list of component HedStrings for this combined string. - - """ - super().__init__("") - self._children = list(hed_string for hed_string in hed_string_obj_list if hed_string is not None) - # Update the direct children to point to this combined string, rather than their original string - for child in self._children: - for sub_child in child.children: - sub_child._parent = self - - self._original_children = self._children - - def get_original_hed_string(self): - return ",".join([group._hed_string for group in self._children]) - - def sort(self): - combined_string = HedString.from_hed_strings(self._children) - combined_string.sorted(update_self=True) - return combined_string - - @property - def span(self): - """ Return the source span of this group from the source hed string. - - Return: - tuple: - - int: start index of the group (including parentheses) from the source string. - - int: end index of the group (including parentheses) from the source string. - - """ - return 0, len(self.get_original_hed_string()) - - @property - def children(self): - """ Return the direct children of this string. - - Returns: - list: a list of direct children of this group. - - """ - return [child for sub_string in self._children for child in sub_string._children] - - def remove(self, items_to_remove): - """ Remove tags/groups by identity. - - Parameters: - items_to_remove (list): A list of HedGroup and HedTag objects to remove. - - Notes: - - Any groups that become empty will also be pruned. - """ - all_groups = [group for sub_group in self._children for group in sub_group.get_all_groups()] - self._remove(items_to_remove, all_groups) - # Remove any lingering empty HedStrings - if any(not hed_string for hed_string in self._children): - if self._original_children is self._children: - self._original_children = self._children.copy() - self._children = [child for child in self._children if child] - - def replace(self, item_to_replace, new_contents): - """ Replace an existing tag or group. - - Parameters: - item_to_replace (HedTag or HedGroup): The tag to replace. - new_contents (HedTag or HedGroup or list): The replacements for the tag. - - :raises KeyError: - - item_to_replace does not exist - """ - replace_sub_string = None - for sub_string in self._children: - for i, child in enumerate(sub_string.children): - if item_to_replace is child: - replace_sub_string = sub_string - break - - replace_sub_string.replace(item_to_replace, new_contents) - - def _get_org_span(self, tag_or_group): - """ If this tag or group was in the original hed string, find it's original span. - - If the hed tag or group was not in the original string, returns (None, None) - - Parameters - ---------- - tag_or_group : HedTag or HedGroup - The hed tag to locate in this string. - - Returns - ------- - tag_span: (int or None, int or None) - The starting and ending index of the given tag in the original string - """ - found_string = None - string_start_index = 0 - for string in self._children: - if string.check_if_in_original(tag_or_group): - found_string = string - break - # Add 1 for comma - string_start_index += string.span[1] + 1 - - if not found_string: - return None, None - - return tag_or_group.span[0] + string_start_index, tag_or_group.span[1] + string_start_index diff --git a/hed/models/hed_tag.py b/hed/models/hed_tag.py index 180f7cefb..01cea0664 100644 --- a/hed/models/hed_tag.py +++ b/hed/models/hed_tag.py @@ -11,19 +11,15 @@ class HedTag: """ - def __init__(self, hed_string, span=None, hed_schema=None, def_dict=None): + def __init__(self, hed_string, hed_schema, span=None, def_dict=None): """ Creates a HedTag. Parameters: hed_string (str): Source hed string for this tag. + hed_schema (HedSchema): A parameter for calculating canonical forms on creation. span (int, int): The start and end indexes of the tag in the hed_string. - hed_schema (HedSchema or None): A convenience parameter for calculating canonical forms on creation. - - :raises ValueError: - - You cannot pass a def_dict without also passing a schema. + def_dict(DefinitionDict or None): The def dict to use to identify def/def expand tags. """ - if def_dict and not hed_schema: - raise ValueError("Passing a def_dict without also passing a schema is invalid.") self._hed_string = hed_string if span is None: span = (0, len(hed_string)) @@ -43,15 +39,12 @@ def __init__(self, hed_string, span=None, hed_schema=None, def_dict=None): self._extension_value = "" self._parent = None - # Downsides: two new parameters - # Have to check for this value, slowing everything down potentially. self._expandable = None self._expanded = False - if hed_schema: - self.convert_to_canonical_forms(hed_schema) - if def_dict: - def_dict.construct_def_tag(self) + self._calculate_to_canonical_forms(hed_schema) + if def_dict: + def_dict.construct_def_tag(self) def copy(self): """ Return a deep copy of this tag. @@ -83,9 +76,6 @@ def short_tag(self): Returns: short_tag (str): The short form of the tag, including value or extension. - Note: - Only valid after calling convert_to_canonical_forms - """ if self._schema_entry: return f"{self._namespace}{self._schema_entry.short_tag_name}{self._extension_value}" @@ -98,10 +88,6 @@ def base_tag(self): Returns: base_tag (str): The long form of the tag, without value or extension. - - Notes: - - Only valid after calling convert_to_canonical_forms. - """ if self._schema_entry: return self._schema_entry.long_tag_name @@ -156,7 +142,6 @@ def org_base_tag(self): Notes: - Warning: This could be empty if the original tag had a name_prefix prepended. e.g. a column where "Label/" is prepended, thus the column value has zero base portion. - - Only valid after calling convert_to_canonical_forms. """ if self._schema_entry: extension_len = len(self._extension_value) @@ -209,7 +194,7 @@ def tag(self, new_tag_val): """ self._tag = new_tag_val self._schema_entry = None - self.convert_to_canonical_forms(self._schema) + self._calculate_to_canonical_forms(self._schema) @property def extension(self): @@ -323,7 +308,7 @@ def lower(self): """ Convenience function, equivalent to str(self).lower(). """ return str(self).lower() - def convert_to_canonical_forms(self, hed_schema): + def _calculate_to_canonical_forms(self, hed_schema): """ Update internal state based on schema. Parameters: @@ -661,21 +646,15 @@ def __deepcopy__(self, memo): return memo[id(self)] # create a new instance of HedTag class - new_tag = HedTag(self._hed_string, self.span) + new_tag = self.__class__.__new__(self.__class__) + new_tag.__dict__.update(self.__dict__) # add the new object to the memo dictionary memo[id(self)] = new_tag - # copy all other attributes except schema and schema_entry - new_tag._tag = copy.deepcopy(self._tag, memo) - new_tag._namespace = copy.deepcopy(self._namespace, memo) - new_tag._extension_value = copy.deepcopy(self._extension_value, memo) + # Deep copy the attributes that need it(most notably, we don't copy schema/schema entry) new_tag._parent = copy.deepcopy(self._parent, memo) new_tag._expandable = copy.deepcopy(self._expandable, memo) new_tag._expanded = copy.deepcopy(self._expanded, memo) - # reference the schema and schema_entry from the original object - new_tag._schema = self._schema - new_tag._schema_entry = self._schema_entry - return new_tag diff --git a/hed/models/indexed_df.py b/hed/models/indexed_df.py new file mode 100644 index 000000000..c23c4c7fd --- /dev/null +++ b/hed/models/indexed_df.py @@ -0,0 +1,39 @@ +from functools import partial +import pandas as pd + +from hed.models.sidecar import Sidecar +from hed.models.tabular_input import TabularInput +from hed.models.hed_string import HedString +from hed.models.definition_dict import DefinitionDict +from hed.models import df_util + + +class IndexedDF: + def __init__(self, tabular_input, sidecar, hed_schema): + self._hed_strings = df_util.get_assembled(tabular_input, sidecar, hed_schema, expand_defs=True) + # self._df = df + # self._index = self._create_index(df) + # self._hed_strings = df_util.get_assembled() + # + # def create_index_from_hed_strings(self): + # + # + # + # @staticmethod + # def find_rows_for_strings(self, df, search_strings): + # cache = {} + # for string in search_strings: + # if string not in cache: + # print("Hi") + # parts = string.split('/') + # for i in range(1, len(parts) + 1): + # part = '/'.join(parts[:i]) + # if part not in cache: + # if i == 1: + # searchable_rows = df + # else: + # searchable_rows = df[cache['/'.join(parts[:i - 1])]] + # cache[part] = searchable_rows[searchable_rows.str.contains(part)].index.to_list() + # # cache[string] = cache[part] # Assign the cache result to the complete string + # + # return cache diff --git a/hed/models/sidecar.py b/hed/models/sidecar.py index d3038fff6..d7d77a09b 100644 --- a/hed/models/sidecar.py +++ b/hed/models/sidecar.py @@ -49,8 +49,6 @@ def all_hed_columns(self): column_refs(list): A list of all valid hed columns by name """ possible_column_references = [column.column_name for column in self if column.column_type != ColumnType.Ignore] - if "HED" not in possible_column_references: - possible_column_references.append("HED") return possible_column_references @@ -74,7 +72,7 @@ def column_data(self): """ return {col_name: ColumnMetadata(name=col_name, source=self.loaded_dict) for col_name in self.loaded_dict} - def get_def_dict(self, hed_schema=None, extra_def_dicts=None): + def get_def_dict(self, hed_schema, extra_def_dicts=None): """ Returns the definition dict for this sidecar. Parameters: @@ -193,11 +191,11 @@ def _load_json_file(self, fp): except (json.decoder.JSONDecodeError, AttributeError) as e: raise HedFileError(HedExceptions.CANNOT_PARSE_JSON, str(e), self.name) from e - def extract_definitions(self, hed_schema=None, error_handler=None): + def extract_definitions(self, hed_schema, error_handler=None): """ Gather and validate definitions in metadata. Parameters: - hed_schema (HedSchema or None): The schema to used to identify tags. + hed_schema (HedSchema): The schema to used to identify tags. error_handler (ErrorHandler or None): The error handler to use for context, uses a default one if None. Returns: diff --git a/hed/models/tabular_input.py b/hed/models/tabular_input.py index 1b9570105..cd3172126 100644 --- a/hed/models/tabular_input.py +++ b/hed/models/tabular_input.py @@ -54,7 +54,7 @@ def reset_column_mapper(self, sidecar=None): self.reset_mapper(new_mapper) - def get_def_dict(self, hed_schema=None, extra_def_dicts=None): + def get_def_dict(self, hed_schema, extra_def_dicts=None): """ Returns the definition dict for this sidecar. Parameters: diff --git a/hed/schema/hed_schema.py b/hed/schema/hed_schema.py index fc6978fa5..e5f6c5e62 100644 --- a/hed/schema/hed_schema.py +++ b/hed/schema/hed_schema.py @@ -1,19 +1,19 @@ import os import shutil +import json from hed.schema.hed_schema_constants import HedKey, HedSectionKey from hed.schema import hed_schema_constants as constants from hed.schema.schema_io import schema_util from hed.schema.schema_io.schema2xml import HedSchema2XML from hed.schema.schema_io.schema2wiki import HedSchema2Wiki - -from hed.schema import schema_validation_util from hed.schema.hed_schema_section import HedSchemaSection, HedSchemaTagSection, HedSchemaUnitClassSection from hed.errors import ErrorHandler from hed.errors.error_types import ValidationErrors +from hed.schema.hed_schema_base import HedSchemaBase -class HedSchema: +class HedSchema(HedSchemaBase): """ A HED schema suitable for processing. """ def __init__(self): @@ -21,13 +21,13 @@ def __init__(self): A HedSchema can be used for validation, checking tag attributes, parsing tags, etc. """ + super().__init__() self._has_duplicate_tags = False self.header_attributes = {} self.filename = None self.prologue = "" self.epilogue = "" - self._is_hed3_schema = None # This is the specified library name_prefix - tags will be {schema_namespace}:{tag_name} self._namespace = "" @@ -37,7 +37,7 @@ def __init__(self): # Basic schema properties # =============================================== @property - def version(self): + def version_number(self): """ The HED version of this schema. Returns: @@ -46,17 +46,13 @@ def version(self): """ return self.header_attributes['version'] - def get_formatted_version(self, as_string=False): - """ The HED version string including namespace and library name if any of this schema. - - Returns: - str: The complete version of this schema including library name and namespace. - - """ + @property + def version(self): + """The complete schema version, including prefix and library name(if applicable)""" library = self.library if library: library = library + '_' - return self._namespace + library + self.version + return self._namespace + library + self.version_number @property def library(self): @@ -89,6 +85,76 @@ def merged(self): """ return not self.header_attributes.get(constants.UNMERGED_ATTRIBUTE, "") + @property + def all_tags(self): + """ Return the tag schema section. + + Returns: + HedSchemaTagSection: The tag section. + """ + return self._sections[HedSectionKey.AllTags] + + @property + def unit_classes(self): + """ Return the unit classes schema section. + + Returns: + HedSchemaUnitClassSection: The unit classes section. + """ + return self._sections[HedSectionKey.UnitClasses] + + @property + def unit_modifiers(self): + """ Return the modifiers classes schema section + + Returns: + HedSchemaSection: The unit modifiers section. + """ + return self._sections[HedSectionKey.UnitModifiers] + + @property + def value_classes(self): + """ Return the value classes schema section. + + Returns: + HedSchemaSection: The value classes section. + """ + return self._sections[HedSectionKey.ValueClasses] + + @property + def attributes(self): + """ Return the attributes schema section. + + Returns: + HedSchemaSection: The attributes section. + """ + return self._sections[HedSectionKey.Attributes] + + @property + def properties(self): + """ Return the properties schema section. + + Returns: + HedSchemaSection: The properties section. + """ + return self._sections[HedSectionKey.Properties] + + def get_schema_versions(self): + """ A list of HED version strings including namespace and library name if any of this schema. + + Returns: + list: The complete version of this schema including library name and namespace. + """ + return [self.get_formatted_version()] + + def get_formatted_version(self): + """ The HED version string including namespace and library name if any of this schema. + + Returns: + str: A json formatted string of the complete version of this schema including library name and namespace. + """ + return json.dumps(self.version) + def get_save_header_attributes(self, save_merged=False): """ returns the attributes that should be saved. @@ -114,10 +180,6 @@ def schema_for_namespace(self, namespace): Returns: HedSchema: The HED schema object for this schema. - - Notes: - -This is mostly a placeholder for HedSchemaGroup and may be refactored out later. - """ if self._namespace != namespace: return None @@ -226,128 +288,6 @@ def set_schema_prefix(self, schema_namespace): self._namespace = schema_namespace - def check_compliance(self, check_for_warnings=True, name=None, error_handler=None): - """ Check for HED3 compliance of this schema. - - Parameters: - check_for_warnings (bool): If True, also checks for formatting issues - name (str): If present, use this as the filename for context - error_handler (ErrorHandler or None): Used to report errors. - - Returns: - list: A list of all warnings and errors found in the file. Each issue is a dictionary. - - Notes: - - Formatting issues include invalid characters and capitalization. - - The name parameter is useful when handling temporary files. - - A default error handler is created if none passed in. - - """ - from hed.schema import schema_compliance - return schema_compliance.check_compliance(self, check_for_warnings, name, error_handler) - - def find_duplicate_tags(self): - """ Find all tags that are not unique. - - Returns: - dict: A dictionary of all duplicate short tags - - Notes: - - The returned dictionary has the short-form tags as keys and lists - of long tags sharing the short form as the values. - - """ - return self.all_tags.duplicate_names - - def __getitem__(self, section_key): - return self._sections[section_key] - - @property - def has_duplicate_tags(self): - """ Return True if this is a valid hed3. - - Returns: - bool: True if this is a valid hed3 schema with no duplicate short tags. - - """ - return self._has_duplicate_tags - - @property - def all_tags(self): - """ Return the tag schema section. - - Returns: - HedSchemaSection: The tag section. - - """ - return self._sections[HedSectionKey.AllTags] - - @property - def unit_classes(self): - """ Return the unit classes schema section. - - Returns: - HedSchemaSection: The unit classes section. - - """ - return self._sections[HedSectionKey.UnitClasses] - - @property - def unit_modifiers(self): - """ Return the modifiers classes schema section - - Returns: - HedSchemaSection: The unit modifiers section. - - """ - return self._sections[HedSectionKey.UnitModifiers] - - @property - def value_classes(self): - """ Return the value classes schema section. - - Returns: - HedSchemaSection: The value classes section. - - """ - return self._sections[HedSectionKey.ValueClasses] - - @property - def attributes(self): - """ Return the attributes schema section. - - Returns: - HedSchemaSection: The attributes section. - - """ - return self._sections[HedSectionKey.Attributes] - - @property - def properties(self): - """ Return the properties schema section. - - Returns: - HedSchemaSection: The properties section. - - """ - return self._sections[HedSectionKey.Properties] - - @property - def is_hed3_schema(self): - """ Return true if this is at least version HED3. - - Returns: - bool: True if this is a hed3 schema. - - Notes: - - This is considered true if the version number is >= 8.0 or it has a library name. - - """ - if self._is_hed3_schema is not None: - return self._is_hed3_schema - - return self.library or schema_validation_util.is_hed3_version_number(self.version) - def __eq__(self, other): """ Return True if these schema match exactly. @@ -395,40 +335,39 @@ def __eq__(self, other): return False return True - def get_unit_class_units(self, unit_class_type): - """ Get the list of unit class units this type will accept. + def __getitem__(self, section_key): + return self._sections[section_key] + + def check_compliance(self, check_for_warnings=True, name=None, error_handler=None): + """ Check for HED3 compliance of this schema. Parameters: - unit_class_type (str): The unit class type to check for. e.g. "time". + check_for_warnings (bool): If True, checks for formatting issues like invalid characters, capitalization. + name (str): If present, use as the filename for context, rather than using the actual filename. + Useful for temp filenames when supporting web services. + error_handler (ErrorHandler or None): Used to report errors. Uses a default one if none passed in. Returns: - list: A list of each UnitEntry this type allows. - - Examples: - Eg 'time' returns ['second', 's', 'day', 'minute', 'hour'] - + list: A list of all warnings and errors found in the file. Each issue is a dictionary. """ - unit_class_entry = self.get_tag_entry(unit_class_type, HedSectionKey.UnitClasses) - if unit_class_entry: - return unit_class_entry.units - return [] + from hed.schema import schema_compliance + return schema_compliance.check_compliance(self, check_for_warnings, name, error_handler) - def get_tags_with_attribute(self, key, section_key=HedSectionKey.AllTags): + def get_tags_with_attribute(self, attribute, key_class=HedSectionKey.AllTags): """ Return tag entries with the given attribute. Parameters: - key (str): A tag attribute. Eg HedKey.ExtensionAllowed - section_key (HedSectionKey): The HedSectionKey for the section to retrieve from. + attribute (str): A tag attribute. Eg HedKey.ExtensionAllowed + key_class (HedSectionKey): The HedSectionKey for the section to retrieve from. Returns: list: A list of all tags with this attribute. Notes: - The result is cached so will be fast after first call. - """ - return self._sections[section_key].get_entries_with_attribute(key, return_name_only=True, - schema_namespace=self._namespace) + return self._sections[key_class].get_entries_with_attribute(attribute, return_name_only=True, + schema_namespace=self._namespace) def get_tag_entry(self, name, key_class=HedSectionKey.AllTags, schema_namespace=""): """ Return the schema entry for this tag, if one exists. @@ -442,7 +381,6 @@ def get_tag_entry(self, name, key_class=HedSectionKey.AllTags, schema_namespace= Returns: HedSchemaEntry: The schema entry for the given tag. - """ if key_class == HedSectionKey.AllTags: if schema_namespace != self._namespace: @@ -452,28 +390,12 @@ def get_tag_entry(self, name, key_class=HedSectionKey.AllTags, schema_namespace= return self._get_tag_entry(name, key_class) - def _get_tag_entry(self, name, key_class=HedSectionKey.AllTags): - """ Return the schema entry for this tag, if one exists. - - Parameters: - name (str): Any form of basic tag(or other section entry) to look up. - This will not handle extensions or similar. - key_class (HedSectionKey or str): The type of entry to return. - - Returns: - HedSchemaEntry: The schema entry for the given tag. - - """ - return self._sections[key_class].get(name) - def find_tag_entry(self, tag, schema_namespace=""): """ Find the schema entry for a given source tag. - Note: Will not identify tags if schema_namespace is set incorrectly - Parameters: - tag (str, HedTag): Any form of tag to look up. Can have an extension, value, etc. - schema_namespace (str): The schema namespace of the tag, if any. + tag (str, HedTag): Any form of tag to look up. Can have an extension, value, etc. + schema_namespace (str): The schema namespace of the tag, if any. Returns: HedTagEntry: The located tag entry for this tag. @@ -482,7 +404,6 @@ def find_tag_entry(self, tag, schema_namespace=""): Notes: Works left to right (which is mostly relevant for errors). - """ if schema_namespace != self._namespace: validation_issues = ErrorHandler.format_error(ValidationErrors.HED_LIBRARY_UNMATCHED, tag, @@ -490,6 +411,23 @@ def find_tag_entry(self, tag, schema_namespace=""): return None, None, validation_issues return self._find_tag_entry(tag, schema_namespace) + # =============================================== + # Private utility functions for getting/finding tags + # =============================================== + def _get_tag_entry(self, name, key_class=HedSectionKey.AllTags): + """ Return the schema entry for this tag, if one exists. + + Parameters: + name (str): Any form of basic tag(or other section entry) to look up. + This will not handle extensions or similar. + key_class (HedSectionKey or str): The type of entry to return. + + Returns: + HedSchemaEntry: The schema entry for the given tag. + + """ + return self._sections[key_class].get(name) + def _find_tag_entry(self, tag, schema_namespace=""): """ Find the schema entry for a given source tag. @@ -594,7 +532,6 @@ def _validate_remaining_terms(self, tag, working_tag, prefix_tag_adj, current_sl # =============================================== def finalize_dictionaries(self): """ Call to finish loading. """ - self._is_hed3_schema = self.is_hed3_schema self._has_duplicate_tags = bool(self.all_tags.duplicate_names) self._update_all_entries() @@ -736,7 +673,7 @@ def _create_empty_sections(): return dictionaries - def get_modifiers_for_unit(self, unit): + def _get_modifiers_for_unit(self, unit): """ Return the valid modifiers for the given unit Parameters: diff --git a/hed/schema/hed_schema_base.py b/hed/schema/hed_schema_base.py new file mode 100644 index 000000000..b0e29ebcc --- /dev/null +++ b/hed/schema/hed_schema_base.py @@ -0,0 +1,125 @@ +""" + Abstract base class for HedSchema and HedSchemaGroup, showing the common functionality +""" + +from hed.errors import ErrorHandler +from hed.schema.hed_schema_constants import HedSectionKey +from abc import ABC, abstractmethod + + +class HedSchemaBase(ABC): + """ Baseclass for schema and schema group. + Overriding the following functions will allow you to use the schema for validation etc. + """ + def __init__(self): + pass + + @abstractmethod + def get_schema_versions(self): + """ A list of HED version strings including namespace and library name if any of this schema. + + Returns: + list: The complete version of this schema including library name and namespace. + """ + raise NotImplemented("This function must be implemented in the baseclass") + + @abstractmethod + def get_formatted_version(self): + """ The HED version string including namespace and library name if any of this schema. + + Returns: + str: The complete version of this schema including library name and namespace. + """ + raise NotImplemented("This function must be implemented in the baseclass") + + @abstractmethod + def schema_for_namespace(self, namespace): + """ Return the HedSchema for the library namespace. + + Parameters: + namespace (str): A schema library name namespace. + + Returns: + HedSchema or None: The specific schema for this library name namespace if exists. + """ + raise NotImplemented("This function must be implemented in the baseclass") + + @property + @abstractmethod + def valid_prefixes(self): + """ Return a list of all prefixes this group will accept. + + Returns: + prefixes(list of str): A list of strings representing valid prefixes for this group. + """ + raise NotImplemented("This function must be implemented in the baseclass") + + @abstractmethod + def get_tags_with_attribute(self, attribute, key_class=HedSectionKey.AllTags): + """ Return tag entries with the given attribute. + + Parameters: + attribute (str): A tag attribute. Eg HedKey.ExtensionAllowed + key_class (HedSectionKey): The HedSectionKey for the section to retrieve from. + + Returns: + list: A list of all tags with this attribute. + + Notes: + - The result is cached so will be fast after first call. + """ + raise NotImplemented("This function must be implemented in the baseclass") + + # todo: maybe tweak this API so you don't have to pass in library namespace? + @abstractmethod + def get_tag_entry(self, name, key_class=HedSectionKey.AllTags, schema_namespace=""): + """ Return the schema entry for this tag, if one exists. + + Parameters: + name (str): Any form of basic tag(or other section entry) to look up. + This will not handle extensions or similar. + If this is a tag, it can have a schema namespace, but it's not required + key_class (HedSectionKey or str): The type of entry to return. + schema_namespace (str): Only used on AllTags. If incorrect, will return None. + + Returns: + HedSchemaEntry: The schema entry for the given tag. + """ + raise NotImplemented("This function must be implemented in the baseclass") + + @abstractmethod + def find_tag_entry(self, tag, schema_namespace=""): + """ Find the schema entry for a given source tag. + + Parameters: + tag (str, HedTag): Any form of tag to look up. Can have an extension, value, etc. + schema_namespace (str): The schema namespace of the tag, if any. + + Returns: + HedTagEntry: The located tag entry for this tag. + str: The remainder of the tag that isn't part of the base tag. + list: A list of errors while converting. + + Notes: + Works left to right (which is mostly relevant for errors). + """ + raise NotImplemented("This function must be implemented in the baseclass") + + @abstractmethod + def __eq__(self, other): + raise NotImplemented("This function must be implemented in the baseclass") + + @abstractmethod + def check_compliance(self, check_for_warnings=True, name=None, error_handler=None): + """ Check for HED3 compliance of this schema. + + Parameters: + check_for_warnings (bool): If True, checks for formatting issues like invalid characters, capitalization. + name (str): If present, use as the filename for context, rather than using the actual filename. + Useful for temp filenames when supporting web services. + error_handler (ErrorHandler or None): Used to report errors. Uses a default one if none passed in. + + Returns: + list: A list of all warnings and errors found in the file. Each issue is a dictionary. + """ + raise NotImplemented("This function must be implemented in the baseclass") diff --git a/hed/schema/hed_schema_entry.py b/hed/schema/hed_schema_entry.py index 2c335b882..18898ad34 100644 --- a/hed/schema/hed_schema_entry.py +++ b/hed/schema/hed_schema_entry.py @@ -176,6 +176,7 @@ def __eq__(self, other): return False return True + class UnitEntry(HedSchemaEntry): """ A single unit entry with modifiers in the HedSchema. """ @@ -191,7 +192,8 @@ def finalize_entry(self, schema): schema (HedSchema): The schema rules come from. """ - self.unit_modifiers = schema.get_modifiers_for_unit(self.name) + self.unit_modifiers = schema._get_modifiers_for_unit(self.name) + class HedTagEntry(HedSchemaEntry): """ A single tag entry in the HedSchema. """ diff --git a/hed/schema/hed_schema_group.py b/hed/schema/hed_schema_group.py index 00bc2f78b..96187b73f 100644 --- a/hed/schema/hed_schema_group.py +++ b/hed/schema/hed_schema_group.py @@ -7,9 +7,10 @@ from hed.errors.exceptions import HedExceptions, HedFileError from hed.errors import ErrorHandler, ValidationErrors from hed.schema.hed_schema_constants import HedSectionKey +from hed.schema.hed_schema_base import HedSchemaBase -class HedSchemaGroup: +class HedSchemaGroup(HedSchemaBase): """ Container for multiple HedSchema objects. Notes: @@ -30,6 +31,7 @@ def __init__(self, schema_list): - Multiple schemas have the same library prefixes. - Empty list passed """ + super().__init__() if len(schema_list) == 0: raise HedFileError(HedExceptions.BAD_PARAMETERS, "Empty list passed to HedSchemaGroup constructor.", filename="Combined Schema") @@ -40,49 +42,21 @@ def __init__(self, schema_list): filename="Combined Schema") self._schemas = {hed_schema._namespace: hed_schema for hed_schema in schema_list} - # =============================================== - # General schema properties/functions - # =============================================== - - def get_formatted_version(self, as_string=True): - x = [schema.get_formatted_version() for schema in self._schemas.values()] - if as_string: - return json.dumps(x) - return x - - @property - def has_duplicate_tags(self): - """ Return True if valid hed3 schema with no duplicate short tags. + def get_schema_versions(self): + """ A list of HED version strings including namespace and library name if any of this schema. Returns: - bool: True if this is a valid hed3 schema with no duplicate short tags. - + list: The complete version of this schema including library name and namespace. """ - return any([schema.has_duplicate_tags for schema in self._schemas.values()]) - - @property - def unit_classes(self): - """ A list of all unit classes represented in this group. """ - return all([schema.unit_classes for schema in self._schemas.values()]) + return [schema.version for schema in self._schemas.values()] - @property - def is_hed3_compatible(self): - """ A list of HED3-compliant schemas in this group. """ - return all([schema.is_hed3_compatible for schema in self._schemas.values()]) - - @property - def is_hed3_schema(self): - """ HedSchemaGroup objects are always HED3.""" - return True + def get_formatted_version(self): + """ The HED version string including namespace and library name if any of this schema. - @property - def unit_modifiers(self): - """ Return a list of all unit modifiers for all schema. """ - return all([schema.unit_modifiers for schema in self._schemas.values()]) - - @property - def value_classes(self): - return all([schema.value_classes for schema in self._schemas.values()]) + Returns: + str: The complete version of this schema including library name and namespace. + """ + return json.dumps(self.get_schema_versions()) def __eq__(self, other): return self._schemas == other._schemas @@ -111,52 +85,52 @@ def valid_prefixes(self): return list(self._schemas.keys()) def check_compliance(self, check_for_warnings=True, name=None, error_handler=None): - """ Check for hed3 compliance of this schema. + """ Check for HED3 compliance of this schema. Parameters: check_for_warnings (bool): If True, checks for formatting issues like invalid characters, capitalization. name (str): If present, use as the filename for context, rather than using the actual filename. + Useful for temp filenames when supporting web services. error_handler (ErrorHandler or None): Used to report errors. Uses a default one if none passed in. Returns: list: A list of all warnings and errors found in the file. Each issue is a dictionary. - - Notes: - - Useful for temp filenames when supporting web services. - """ issues_list = [] for schema in self._schemas.values(): issues_list += schema.check_compliance(check_for_warnings, name, error_handler) return issues_list - def get_tags_with_attribute(self, key): - """ Return the tags with this attribute. + def get_tags_with_attribute(self, attribute, key_class=HedSectionKey.AllTags): + """ Return tag entries with the given attribute. Parameters: - key (str): The attributes. + attribute (str): A tag attribute. Eg HedKey.ExtensionAllowed + key_class (HedSectionKey): The HedSectionKey for the section to retrieve from. + + Returns: + list: A list of all tags with this attribute. + Notes: + - The result is cached so will be fast after first call. """ all_tags = set() for schema in self._schemas.values(): - all_tags.update(schema.get_tags_with_attribute(key)) - return all_tags + all_tags.update(schema.get_tags_with_attribute(attribute, key_class)) + return list(all_tags) - # todo: maybe tweak this API so you don't have to pass in library namespace? def get_tag_entry(self, name, key_class=HedSectionKey.AllTags, schema_namespace=""): """ Return the schema entry for this tag, if one exists. Parameters: name (str): Any form of basic tag(or other section entry) to look up. - key_class (HedSectionKey): The tag section to search. - schema_namespace (str or None): An optional namespace associated with this tag. + This will not handle extensions or similar. + If this is a tag, it can have a schema namespace, but it's not required + key_class (HedSectionKey or str): The type of entry to return. + schema_namespace (str): Only used on AllTags. If incorrect, will return None. Returns: - HedSchemaEntry: The schema entry for the given tag. - - Notes: - - This will not handle extensions or similar. - + HedSchemaEntry: The schema entry for the given tag. """ specific_schema = self.schema_for_namespace(schema_namespace) if not specific_schema: @@ -165,21 +139,19 @@ def get_tag_entry(self, name, key_class=HedSectionKey.AllTags, schema_namespace= return specific_schema.get_tag_entry(name, key_class, schema_namespace) def find_tag_entry(self, tag, schema_namespace=""): - """ Find a schema entry for a source tag. + """ Find the schema entry for a given source tag. Parameters: - tag (str or HedTag): Any form of tag to look up. Can have an extension, value, etc. - schema_namespace (str): The namespace the library, if any. + tag (str, HedTag): Any form of tag to look up. Can have an extension, value, etc. + schema_namespace (str): The schema namespace of the tag, if any. Returns: - tuple: - - HedTagEntry: The located tag entry for this tag. - - str: The remainder of the tag that isn't part of the base tag. - - list: A list of errors while converting. + HedTagEntry: The located tag entry for this tag. + str: The remainder of the tag that isn't part of the base tag. + list: A list of errors while converting. Notes: - - Works right to left.(mostly relevant for errors). - + Works left to right (which is mostly relevant for errors). """ specific_schema = self.schema_for_namespace(schema_namespace) if not specific_schema: diff --git a/hed/schema/hed_schema_io.py b/hed/schema/hed_schema_io.py index fdfdf9775..df5601305 100644 --- a/hed/schema/hed_schema_io.py +++ b/hed/schema/hed_schema_io.py @@ -1,5 +1,6 @@ """ Utilities for loading and outputting HED schema. """ import os +import json from hed.schema.schema_io.xml2schema import HedSchemaXMLParser from hed.schema.schema_io.wiki2schema import HedSchemaWikiParser from hed.schema import hed_schema_constants, hed_cache @@ -166,6 +167,8 @@ def load_schema_version(xml_version=None, xml_folder=None): Parameters: xml_version (str or list or None): List or str specifying which official HED schemas to use. An empty string returns the latest version + A json str format is also supported, + based on the output of HedSchema.get_formatted_version xml_folder (str): Path to a folder containing schema. Returns: @@ -175,6 +178,13 @@ def load_schema_version(xml_version=None, xml_folder=None): - The xml_version is not valid. - A fatal error was encountered in parsing """ + if xml_version and isinstance(xml_version, str) and \ + ((xml_version.startswith("[") and xml_version.endswith("]")) or + (xml_version.startswith('"') and xml_version.endswith('"'))): + try: + xml_version = json.loads(xml_version) + except json.decoder.JSONDecodeError as e: + raise HedFileError(HedExceptions.CANNOT_PARSE_JSON, str(e), xml_version) from e if xml_version and isinstance(xml_version, list): schemas = [_load_schema_version(xml_version=version, xml_folder=xml_folder) for version in xml_version] if len(schemas) == 1: diff --git a/hed/schema/schema_validation_util.py b/hed/schema/schema_validation_util.py index e08e194a0..aaf7cccea 100644 --- a/hed/schema/schema_validation_util.py +++ b/hed/schema/schema_validation_util.py @@ -43,24 +43,6 @@ def validate_version_string(version_string): return False -def is_hed3_version_number(version_string): - """ Check validity of the version. - - Parameters: - version_string (str): A version string. - - Returns: - bool: If True the version corresponds to a HED3 schema. - - """ - try: - version = Version(version_string) - if version.major >= 8: - return True - except ValueError: - return False - return False - header_attribute_validators = { constants.VERSION_ATTRIBUTE: (validate_version_string, HedExceptions.HED_SCHEMA_VERSION_INVALID), diff --git a/hed/tools/analysis/__init__.py b/hed/tools/analysis/__init__.py index 7ff23700e..d0a02bbe5 100644 --- a/hed/tools/analysis/__init__.py +++ b/hed/tools/analysis/__init__.py @@ -1 +1,10 @@ -""" Basic analysis tools. """ \ No newline at end of file +""" Basic analysis tools. """ +from .file_dictionary import FileDictionary +from .hed_context_manager import OnsetGroup, HedContextManager +from .hed_type_definitions import HedTypeDefinitions +from .hed_type_factors import HedTypeFactors +from .hed_type_values import HedTypeValues +from .hed_type_manager import HedTypeManager +from .hed_type_counts import HedTypeCount +from .key_map import KeyMap +from .tabular_summary import TabularSummary \ No newline at end of file diff --git a/hed/tools/analysis/analysis_util.py b/hed/tools/analysis/analysis_util.py index 343ff80cc..37f2b9b9d 100644 --- a/hed/tools/analysis/analysis_util.py +++ b/hed/tools/analysis/analysis_util.py @@ -116,7 +116,7 @@ def search_strings(hed_strings, queries, query_names=None): # expand_defs (bool): If True, definitions are expanded when the events are assembled. # # Returns: -# list: A list of HedString or HedStringGroup objects. +# list: A list of HedString objects. # # """ # hed_list = list(table.iter_dataframe(hed_ops=[hed_schema], return_string_only=True, diff --git a/hed/tools/analysis/event_manager.py b/hed/tools/analysis/event_manager.py index 58770a1e2..6062f85e8 100644 --- a/hed/tools/analysis/event_manager.py +++ b/hed/tools/analysis/event_manager.py @@ -36,7 +36,7 @@ def iter_context(self): Yields: int: position in the dataFrame - HedStringGroup: Context + HedString: Context """ diff --git a/hed/tools/analysis/hed_context_manager.py b/hed/tools/analysis/hed_context_manager.py index f3a5b8758..6cf26ed94 100644 --- a/hed/tools/analysis/hed_context_manager.py +++ b/hed/tools/analysis/hed_context_manager.py @@ -60,7 +60,7 @@ def iter_context(self): Yields: HedString: The HedString. - HedStringGroup: Context + HedString: Context """ diff --git a/hed/tools/analysis/hed_type_definitions.py b/hed/tools/analysis/hed_type_definitions.py index 1cd80c914..fdc87b454 100644 --- a/hed/tools/analysis/hed_type_definitions.py +++ b/hed/tools/analysis/hed_type_definitions.py @@ -87,7 +87,6 @@ def _extract_entry_values(self, entry): description = '' other_tags = [] for hed_tag in tag_list: - hed_tag.convert_to_canonical_forms(self.hed_schema) if hed_tag.short_base_tag.lower() == 'description': description = hed_tag.extension elif hed_tag.short_base_tag.lower() != self.type_tag: diff --git a/hed/tools/bids/__init__.py b/hed/tools/bids/__init__.py index 0f9e49e10..fae3491a8 100644 --- a/hed/tools/bids/__init__.py +++ b/hed/tools/bids/__init__.py @@ -1 +1,9 @@ -""" Models for BIDS datasets and files.""" \ No newline at end of file +""" Models for BIDS datasets and files.""" + +from .bids_dataset import BidsDataset +from .bids_file import BidsFile +from .bids_file_dictionary import BidsFileDictionary +from .bids_file_group import BidsFileGroup +from .bids_sidecar_file import BidsSidecarFile +from .bids_tabular_dictionary import BidsTabularDictionary +from .bids_tabular_file import BidsTabularFile diff --git a/hed/tools/bids/bids_dataset.py b/hed/tools/bids/bids_dataset.py index bbb06ae7b..fc4aa89f4 100644 --- a/hed/tools/bids/bids_dataset.py +++ b/hed/tools/bids/bids_dataset.py @@ -92,24 +92,6 @@ def validate(self, types=None, check_for_warnings=True): def get_summary(self): """ Return an abbreviated summary of the dataset. """ summary = {"dataset": self.dataset_description['Name'], - "hed_schema_versions": self.get_schema_versions(), + "hed_schema_versions": self.schema.get_schema_versions(), "file_group_types": f"{str(list(self.tabular_files.keys()))}"} return summary - - def get_schema_versions(self): - """ Return the schema versions used in this dataset. - - Returns: - list: List of schema versions used in this dataset. - - """ - if isinstance(self.schema, HedSchema): - return [self.schema.version] - version_list = [] - for prefix, schema in self.schema._schemas.items(): - name = schema.version - if schema.library: - name = schema.library + '_' + name - name = prefix + name - version_list.append(name) - return version_list diff --git a/hed/tools/remodeling/cli/run_remodel.py b/hed/tools/remodeling/cli/run_remodel.py index 47e8333d6..9f8a6bde3 100644 --- a/hed/tools/remodeling/cli/run_remodel.py +++ b/hed/tools/remodeling/cli/run_remodel.py @@ -99,7 +99,7 @@ def run_bids_ops(dispatch, args): bids = BidsDataset(dispatch.data_root, tabular_types=['events'], exclude_dirs=args.exclude_dirs) dispatch.hed_schema = bids.schema if args.verbose: - print(f"Successfully parsed BIDS dataset with HED schema {str(bids.get_schema_versions())}") + print(f"Successfully parsed BIDS dataset with HED schema {str(bids.schema.get_schema_versions())}") events = bids.get_tabular_group(args.file_suffix) if args.verbose: print(f"Processing {dispatch.data_root}") diff --git a/hed/validator/def_validator.py b/hed/validator/def_validator.py index 8036c6e13..0eb159976 100644 --- a/hed/validator/def_validator.py +++ b/hed/validator/def_validator.py @@ -14,7 +14,7 @@ def __init__(self, def_dicts=None, hed_schema=None): Parameters: def_dicts (list or DefinitionDict or str): DefinitionDicts containing the definitions to pass to baseclass - + hed_schema(HedSchema or None): Required if passing strings or lists of strings, unused otherwise. """ super().__init__(def_dicts, hed_schema=hed_schema) diff --git a/hed/validator/hed_validator.py b/hed/validator/hed_validator.py index ae2d791d9..6ce937454 100644 --- a/hed/validator/hed_validator.py +++ b/hed/validator/hed_validator.py @@ -18,7 +18,7 @@ class HedValidator: """ Top level validation of HED strings. """ - def __init__(self, hed_schema=None, def_dicts=None, run_full_onset_checks=True, definitions_allowed=False): + def __init__(self, hed_schema, def_dicts=None, run_full_onset_checks=True, definitions_allowed=False): """ Constructor for the HedValidator class. Parameters: @@ -66,7 +66,7 @@ def run_basic_checks(self, hed_string, allow_placeholders): return issues if hed_string == "n/a" or not self._hed_schema: return issues - issues += hed_string.convert_to_canonical_forms(self._hed_schema) + issues += hed_string._calculate_to_canonical_forms(self._hed_schema) if check_for_any_errors(issues): return issues # This is required so it can validate the tag a tag expands into diff --git a/hed/validator/sidecar_validator.py b/hed/validator/sidecar_validator.py index cd1500d30..9e6f222fd 100644 --- a/hed/validator/sidecar_validator.py +++ b/hed/validator/sidecar_validator.py @@ -111,6 +111,9 @@ def validate_structure(self, sidecar, error_handler): def _validate_refs(self, sidecar, error_handler): possible_column_refs = sidecar.all_hed_columns + if "HED" not in possible_column_refs: + possible_column_refs.append("HED") + issues = [] found_column_references = {} for column_data in sidecar: @@ -123,7 +126,7 @@ def _validate_refs(self, sidecar, error_handler): if len(hed_strings) > 1: error_handler.push_error_context(ErrorContext.SIDECAR_KEY_NAME, key_name) - error_handler.push_error_context(ErrorContext.HED_STRING, HedString(hed_string)) + error_handler.push_error_context(ErrorContext.HED_STRING, HedString(hed_string, hed_schema=self._schema)) invalid_locations = self._find_non_matching_braces(hed_string) for loc in invalid_locations: bad_symbol = hed_string[loc] diff --git a/hed/validator/spreadsheet_validator.py b/hed/validator/spreadsheet_validator.py index 66cf58f0f..025aa54d4 100644 --- a/hed/validator/spreadsheet_validator.py +++ b/hed/validator/spreadsheet_validator.py @@ -4,7 +4,6 @@ from hed.errors.error_types import ColumnErrors from hed.models import ColumnType from hed import HedString -from hed.models.hed_string_group import HedStringGroup from hed.errors.error_reporter import sort_issues, check_for_any_errors PANDAS_COLUMN_PREFIX_TO_IGNORE = "Unnamed: " @@ -66,7 +65,7 @@ def _run_checks(self, data, error_handler): error_handler.push_error_context(ErrorContext.COLUMN, columns[column_number]) - column_hed_string = HedString(cell) + column_hed_string = HedString(cell, self._schema) row_strings.append(column_hed_string) error_handler.push_error_context(ErrorContext.HED_STRING, column_hed_string) new_column_issues = self._hed_validator.run_basic_checks(column_hed_string, allow_placeholders=False) @@ -78,8 +77,8 @@ def _run_checks(self, data, error_handler): issues += new_column_issues if check_for_any_errors(new_column_issues): continue - else: - row_string = HedStringGroup(row_strings) + elif row_strings: + row_string = HedString.from_hed_strings(row_strings) error_handler.push_error_context(ErrorContext.HED_STRING, row_string) new_column_issues = self._hed_validator.run_full_string_checks(row_string) diff --git a/hed/validator/tag_validator.py b/hed/validator/tag_validator.py index 57ca84fe7..7939f72ea 100644 --- a/hed/validator/tag_validator.py +++ b/hed/validator/tag_validator.py @@ -26,7 +26,7 @@ class TagValidator: # Placeholder characters are checked elsewhere, but by default allowed TAG_ALLOWED_CHARS = "-_/" - def __init__(self, hed_schema=None): + def __init__(self, hed_schema): """Constructor for the Tag_Validator class. Parameters: @@ -258,8 +258,6 @@ def check_tag_invalid_chars(self, original_tag, allow_placeholders): """ validation_issues = self._check_invalid_prefix_issues(original_tag) allowed_chars = self.TAG_ALLOWED_CHARS - if not self._hed_schema or not self._hed_schema.is_hed3_schema: - allowed_chars += " " if allow_placeholders: allowed_chars += "#" validation_issues += self._check_invalid_chars(original_tag.org_base_tag, allowed_chars, original_tag) diff --git a/tests/errors/test_error_reporter.py b/tests/errors/test_error_reporter.py index 6cff88a25..bec45f60e 100644 --- a/tests/errors/test_error_reporter.py +++ b/tests/errors/test_error_reporter.py @@ -1,6 +1,8 @@ import unittest from hed.errors import ErrorHandler, ErrorContext, ErrorSeverity, ValidationErrors, SchemaWarnings, \ - get_printable_issue_string + get_printable_issue_string, sort_issues +from hed import HedString +from hed import load_schema_version class Test(unittest.TestCase): @@ -113,3 +115,30 @@ def test_printable_issue_string_with_filenames(self): self.assertEqual(printable_issues3.count(myfile), 1) self.error_handler.reset_error_context() + + def test_sort_issues(self): + schema = load_schema_version("8.1.0") + issues = [ + {ErrorContext.CUSTOM_TITLE: 'issue3', ErrorContext.FILE_NAME: 'File2', ErrorContext.ROW: 5, + ErrorContext.HED_STRING: HedString('Test C', schema)}, + {ErrorContext.CUSTOM_TITLE: 'issue1', ErrorContext.FILE_NAME: 'File1', ErrorContext.ROW: 10, + ErrorContext.HED_STRING: HedString('Test A', schema)}, + {ErrorContext.CUSTOM_TITLE: 'issue2', ErrorContext.FILE_NAME: 'File1', ErrorContext.ROW: 2}, + {ErrorContext.CUSTOM_TITLE: 'issue4', ErrorContext.FILE_NAME: 'File2', ErrorContext.ROW: 1, + ErrorContext.HED_STRING: HedString('Test D', schema)}, + {ErrorContext.CUSTOM_TITLE: 'issue5', ErrorContext.FILE_NAME: 'File3', ErrorContext.ROW: 15} + ] + + sorted_issues = sort_issues(issues) + self.assertEqual(sorted_issues[0][ErrorContext.CUSTOM_TITLE], 'issue1') + self.assertEqual(sorted_issues[1][ErrorContext.CUSTOM_TITLE], 'issue2') + self.assertEqual(sorted_issues[2][ErrorContext.CUSTOM_TITLE], 'issue3') + self.assertEqual(sorted_issues[3][ErrorContext.CUSTOM_TITLE], 'issue4') + self.assertEqual(sorted_issues[4][ErrorContext.CUSTOM_TITLE], 'issue5') + + reversed_issues = sort_issues(issues, reverse=True) + self.assertEqual(reversed_issues[0][ErrorContext.CUSTOM_TITLE], 'issue5') + self.assertEqual(reversed_issues[1][ErrorContext.CUSTOM_TITLE], 'issue4') + self.assertEqual(reversed_issues[2][ErrorContext.CUSTOM_TITLE], 'issue3') + self.assertEqual(reversed_issues[3][ErrorContext.CUSTOM_TITLE], 'issue2') + self.assertEqual(reversed_issues[4][ErrorContext.CUSTOM_TITLE], 'issue1') diff --git a/tests/models/test_column_mapper.py b/tests/models/test_column_mapper.py index 7f399f0dd..54064c515 100644 --- a/tests/models/test_column_mapper.py +++ b/tests/models/test_column_mapper.py @@ -4,19 +4,21 @@ from hed.models import ColumnMapper, ColumnType, HedString from hed.models.sidecar import Sidecar from hed.errors import ValidationErrors - +from hed import load_schema class Test(unittest.TestCase): - schema_file = '../data/schema_tests/HED8.0.0t.xml' - @classmethod def setUpClass(cls): + base_data_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../data/') + schema_file = 'schema_tests/HED8.0.0t.xml' + + cls.hed_schema = load_schema(os.path.join(base_data_dir, schema_file)) cls.integer_key_dictionary = {0: 'one', 1: 'two', 2: 'three'} cls.zero_based_row_column_count = 3 cls.column_prefix_dictionary = {2: 'Event/Description/', 3: 'Event/Label/', 4: 'Event/Category/'} cls.category_key = 'Event/Category/' cls.category_participant_and_stimulus_tags = \ - HedString('Event/Category/Participant response, Event/Category/Stimulus') + HedString('Event/Category/Participant response, Event/Category/Stimulus', cls.hed_schema) cls.row_with_hed_tags = ['event1', 'tag1', 'tag2'] diff --git a/tests/models/test_definition_dict.py b/tests/models/test_definition_dict.py index 61296e638..357584cc1 100644 --- a/tests/models/test_definition_dict.py +++ b/tests/models/test_definition_dict.py @@ -23,7 +23,7 @@ def check_def_base(self, test_strings, expected_issues): # print(test_key) # print(test_issues) # print(expected_issue) - self.assertCountEqual(test_issues, expected_issue, HedString(test_strings[test_key])) + self.assertCountEqual(test_issues, expected_issue, HedString(test_strings[test_key], self.hed_schema)) class TestDefinitionDict(TestDefBase): @@ -100,10 +100,10 @@ def test_definitions(self): 'invalidPlaceholder': self.format_error(DefinitionErrors.INVALID_DEFINITION_EXTENSION, tag=0, def_name="InvalidDef1/InvalidPlaceholder"), 'defInGroup': self.format_error(DefinitionErrors.DEF_TAG_IN_DEFINITION, - tag=HedTag("Def/ImproperlyPlacedDef"), def_name="ValidDefName"), + tag=HedTag("Def/ImproperlyPlacedDef", self.hed_schema), def_name="ValidDefName"), 'defExpandInGroup': self.format_error(DefinitionErrors.DEF_TAG_IN_DEFINITION, - tag=HedTag("Def-expand/ImproperlyPlacedDef"), - def_name="ValidDefName"), + tag=HedTag("Def-expand/ImproperlyPlacedDef", self.hed_schema), + def_name="ValidDefName"), 'doublePoundSignPlaceholder': self.format_error(DefinitionErrors.INVALID_DEFINITION_EXTENSION, tag=0, def_name="InvalidDef/##"), 'doublePoundSignDiffPlaceholder': self.format_error(DefinitionErrors.WRONG_NUMBER_PLACEHOLDER_TAGS, diff --git a/tests/models/test_definition_entry.py b/tests/models/test_definition_entry.py index 8407342f1..07854ad8d 100644 --- a/tests/models/test_definition_entry.py +++ b/tests/models/test_definition_entry.py @@ -9,121 +9,7 @@ class Test(unittest.TestCase): @classmethod def setUpClass(cls): - cls.def1 = HedString('(Condition-variable/Blech, Red, Blue, Description/This is a description)') - hed_schema = load_schema_version('8.1.0') - cls.def2 = HedString('(Condition-variable/Blech, Red, Blue, Description/This is a description)', - hed_schema=hed_schema) - cls.def3 = HedString('(Condition-variable/Blech, Red, Label/#, Blue, Description/This is a description)') - cls.def4 = HedString('(Condition-variable/Blech, Red, Label/#, Blue, Description/This is a description)', - hed_schema=hed_schema) - cls.hed_schema = hed_schema - - # def test_constructor(self): - # def_entry1 = DefinitionEntry('Def1', self.def1, False, None) - # self.assertIsInstance(def_entry1, DefinitionEntry) - # self.assertIn('Condition-variable/Blech', def_entry1.tag_dict) - # def_entry2 = DefinitionEntry('Def2', self.def2, False, None) - # self.assertIsInstance(def_entry2, DefinitionEntry) - # self.assertNotIn('Condition-variable/Blech', def_entry2.tag_dict) - # def_entry3 = DefinitionEntry('Def3', self.def3, False, None) - # self.assertIsInstance(def_entry3, DefinitionEntry) - # self.assertIn('Condition-variable/Blech', def_entry3.tag_dict) - # def_entry4 = DefinitionEntry('Def4', self.def4, False, None) - # self.assertIsInstance(def_entry4, DefinitionEntry) - # self.assertNotIn('Condition-variable/Blech', def_entry4.tag_dict) - # def_entry3a = DefinitionEntry('Def3a', self.def3, True, None) - # self.assertIsInstance(def_entry3a, DefinitionEntry) - # self.assertIn('Condition-variable/Blech', def_entry3a.tag_dict) - # def_entry4a = DefinitionEntry('Def4a', self.def4, True, None) - # self.assertIsInstance(def_entry4a, DefinitionEntry) - # self.assertNotIn('Condition-variable/Blech', def_entry4a.tag_dict) - - def test_get_definition(self): - def_entry1 = DefinitionEntry('Def1', self.def1, False, None) - str1 = HedString("Green, Def/Def1, Blue") - ret1, ret1 = def_entry1.get_definition(str1) - def_entry1a = DefinitionEntry('Def1', self.def3, False, None) - str1a = HedString("Green, Def/Def1, Blue", hed_schema=self.hed_schema) - ret3, ret3 = def_entry1a.get_definition(str1a) - def_entry1b = DefinitionEntry('Def1', self.def4, True, None) - str2b = HedString("Green, Def/Def1, Blue", hed_schema=self.hed_schema) - # self.assertIn('Condition-variable/Blech', def_entry1.tag_dict) - # def_entry2 = DefinitionEntry('Def2', self.def2, False, None) - # self.assertIsInstance(def_entry2, DefinitionEntry) - # self.assertNotIn('Condition-variable/Blech', def_entry2.tag_dict) - # def_entry3 = DefinitionEntry('Def3', self.def3, False, None) - # self.assertIsInstance(def_entry3, DefinitionEntry) - # self.assertIn('Condition-variable/Blech', def_entry3.tag_dict) - # def_entry4 = DefinitionEntry('Def4', self.def4, False, None) - # self.assertIsInstance(def_entry4, DefinitionEntry) - # self.assertNotIn('Condition-variable/Blech', def_entry4.tag_dict) - # def_entry3a = DefinitionEntry('Def3a', self.def3, True, None) - # self.assertIsInstance(def_entry3a, DefinitionEntry) - # self.assertIn('Condition-variable/Blech', def_entry3a.tag_dict) - # def_entry4a = DefinitionEntry('Def4a', self.def4, True, None) - # self.assertIsInstance(def_entry4a, DefinitionEntry) - # self.assertNotIn('Condition-variable/Blech', def_entry4a.tag_dict) - - # def test_check_for_definitions_placeholder(self): - # def_dict = DefinitionDict() - # original_def_count = len(def_dict.defs) - # hed_string_obj = HedString(self.placeholder_def_string) - # hed_string_obj.validate(def_dict) - # new_def_count = len(def_dict.defs) - # self.assertGreater(new_def_count, original_def_count) - # - # placeholder_invalid_def_contents = "(Age/#,Item/TestDef2/#)" - # placeholder_invalid_def_string = f"(Definition/TestDefPlaceholder/#,{placeholder_invalid_def_contents})" - # - # def test_definitions(self): - # test_strings = { - # 'noGroupTag': "(Definition/ValidDef1)", - # 'placeholderNoGroupTag': "(Definition/InvalidDef1/#)", - # 'placeholderWrongSpot': "(Definition/InvalidDef1#)", - # 'twoDefTags': f"(Definition/ValidDef1,Definition/InvalidDef2,{self.def_contents_string})", - # 'twoGroupTags': f"(Definition/InvalidDef1,{self.def_contents_string},{self.def_contents_string2})", - # 'extraOtherTags': "(Definition/InvalidDef1, InvalidContents)", - # 'duplicateDef': f"(Definition/Def1), (Definition/Def1, {self.def_contents_string})", - # 'duplicateDef2': f"(Definition/Def1), (Definition/Def1/#, {self.placeholder_def_contents})", - # 'defAlreadyTagInSchema': "(Definition/Item)", - # 'defTooManyPlaceholders': self.placeholder_invalid_def_string, - # 'invalidPlaceholder': "(Definition/InvalidDef1/InvalidPlaceholder)", - # 'invalidPlaceholderExtension': "(Definition/InvalidDef1/this-part-is-not-allowed/#)", - # 'defInGroup': "(Definition/ValidDefName, (Def/ImproperlyPlacedDef))", - # 'defExpandInGroup': "(Definition/ValidDefName, (Def-expand/ImproperlyPlacedDef, (ImproperContents)))" - # } - # expected_results = { - # 'noGroupTag': [], - # 'placeholderNoGroupTag': ErrorHandler.format_error(DefinitionErrors.WRONG_NUMBER_PLACEHOLDER_TAGS, - # "InvalidDef1", expected_count=1, tag_list=[]), - # 'placeholderWrongSpot': ErrorHandler.format_error(DefinitionErrors.INVALID_DEFINITION_EXTENSION, - # "InvalidDef1#"), - # 'twoDefTags': ErrorHandler.format_error(DefinitionErrors.WRONG_NUMBER_GROUPS, - # "ValidDef1", ["Definition/InvalidDef2"]), - # 'twoGroupTags': ErrorHandler.format_error(DefinitionErrors.WRONG_NUMBER_GROUPS, - # "InvalidDef1", - # [self.def_contents_string, self.def_contents_string2]), - # 'extraOtherTags': ErrorHandler.format_error(DefinitionErrors.WRONG_NUMBER_GROUPS, "InvalidDef1", - # ['InvalidContents']), - # 'duplicateDef': ErrorHandler.format_error(DefinitionErrors.DUPLICATE_DEFINITION, "Def1"), - # 'duplicateDef2': ErrorHandler.format_error(DefinitionErrors.DUPLICATE_DEFINITION, "Def1"), - # # This is not an error since re-used terms are checked elsewhere. - # 'defAlreadyTagInSchema': [], - # 'defTooManyPlaceholders': ErrorHandler.format_error(DefinitionErrors.WRONG_NUMBER_PLACEHOLDER_TAGS, - # "TestDefPlaceholder", expected_count=1, - # tag_list=["Age/#", "Item/TestDef2/#"]), - # 'invalidPlaceholderExtension': ErrorHandler.format_error(DefinitionErrors.INVALID_DEFINITION_EXTENSION, - # "InvalidDef1/this-part-is-not-allowed"), - # 'invalidPlaceholder': ErrorHandler.format_error(DefinitionErrors.INVALID_DEFINITION_EXTENSION, - # "InvalidDef1/InvalidPlaceholder"), - # 'defInGroup': ErrorHandler.format_error(DefinitionErrors.DEF_TAG_IN_DEFINITION, - # tag=HedTag("Def/ImproperlyPlacedDef"), def_name="ValidDefName"), - # 'defExpandInGroup': ErrorHandler.format_error(DefinitionErrors.DEF_TAG_IN_DEFINITION, - # tag=HedTag("Def-expand/ImproperlyPlacedDef"), - # def_name="ValidDefName") - # } - # - # self.check_def_base(test_strings, expected_results) + pass if __name__ == '__main__': diff --git a/tests/models/test_expression_parser.py b/tests/models/test_expression_parser.py index 2066e4e2a..926338d8f 100644 --- a/tests/models/test_expression_parser.py +++ b/tests/models/test_expression_parser.py @@ -1,6 +1,5 @@ import unittest from hed.models.hed_string import HedString -from hed.models.hed_string_group import HedStringGroup from hed.models.expression_parser import QueryParser import os from hed import schema @@ -37,22 +36,6 @@ def base_test(self, parse_expr, search_strings): # print(f"\t\tFound as group(s) {str([str(r) for r in result2])}") self.assertEqual(bool(result2), expected_result) - # Same test with HedStringGroup in - hed_string_comb = HedStringGroup([hed_string]) - result3 = expression.search(hed_string_comb) - # print(f"\tSearching string '{str(hed_string)}'") - # if result3: - # print(f"\t\tFound as group(s) {str([str(r) for r in result3])}") - self.assertEqual(bool(result3), expected_result) - - for r2, r3 in zip(result2, result3): - # Ensure r2 is only a HedString if r3 is. - if isinstance(r3, HedStringGroup): - self.assertIsInstance(r2, HedString) - else: - self.assertNotIsInstance(r2, HedString) - self.assertEqual(r2, r3) - def test_broken_search_strings(self): test_search_strings = [ "A and", @@ -637,29 +620,29 @@ def test_and_or(self): def test_logical_negation(self): expression = QueryParser("~a") - hed_string = HedString("A") + hed_string = HedString("A", self.hed_schema) self.assertEqual(bool(expression.search(hed_string)), False) - hed_string = HedString("B") + hed_string = HedString("B", self.hed_schema) self.assertEqual(bool(expression.search(hed_string)), True) expression = QueryParser("~a and b") - hed_string = HedString("A") + hed_string = HedString("A", self.hed_schema) self.assertEqual(bool(expression.search(hed_string)), False) - hed_string = HedString("B") + hed_string = HedString("B", self.hed_schema) self.assertEqual(bool(expression.search(hed_string)), True) - hed_string = HedString("A, B") + hed_string = HedString("A, B", self.hed_schema) self.assertEqual(bool(expression.search(hed_string)), False) expression = QueryParser("~( (a or b) and c)") - hed_string = HedString("A") + hed_string = HedString("A", self.hed_schema) self.assertEqual(bool(expression.search(hed_string)), True) - hed_string = HedString("B") + hed_string = HedString("B", self.hed_schema) self.assertEqual(bool(expression.search(hed_string)), True) - hed_string = HedString("C") + hed_string = HedString("C", self.hed_schema) self.assertEqual(bool(expression.search(hed_string)), True) - hed_string = HedString("A, B") + hed_string = HedString("A, B", self.hed_schema) self.assertEqual(bool(expression.search(hed_string)), True) - hed_string = HedString("A, C") + hed_string = HedString("A, C", self.hed_schema) self.assertEqual(bool(expression.search(hed_string)), False) def test_not_in_line(self): diff --git a/tests/models/test_hed_group.py b/tests/models/test_hed_group.py index 8bfd96d6f..96d1744c9 100644 --- a/tests/models/test_hed_group.py +++ b/tests/models/test_hed_group.py @@ -69,15 +69,17 @@ def test_find_tags_with_term(self): self.assertEqual(len(located_tags), 0) def _compare_strings(self, hed_strings): - str1 = HedString(hed_strings[0]).sort() + str1 = HedString(hed_strings[0], self.hed_schema) + str1.sort() for hed_string in hed_strings: - str2 = HedString(hed_string).sort() + str2 = HedString(hed_string, self.hed_schema) + str2.sort() self.assertEqual(str1, str2) def _compare_strings2(self, hed_strings): - str1 = HedString(hed_strings[0]) + str1 = HedString(hed_strings[0], self.hed_schema) for hed_string in hed_strings: - str2 = HedString(hed_string) + str2 = HedString(hed_string, self.hed_schema) self.assertEqual(str1.sorted(), str2.sorted()) def test_sort_and_sorted(self): diff --git a/tests/models/test_hed_string.py b/tests/models/test_hed_string.py index 46f7c750c..83ec59966 100644 --- a/tests/models/test_hed_string.py +++ b/tests/models/test_hed_string.py @@ -1,9 +1,14 @@ from hed.models import HedString import unittest from hed import load_schema_version +import copy class TestHedStrings(unittest.TestCase): + @classmethod + def setUpClass(cls): + cls.schema = load_schema_version("8.0.0") + def validator_scalar(self, test_strings, expected_results, test_function): for test_key in test_strings: test_result = test_function(test_strings[test_key]) @@ -20,6 +25,7 @@ def validator_list(self, test_strings, expected_results, test_function): class TestHedString(unittest.TestCase): @classmethod def setUpClass(cls): + cls.schema = load_schema_version("8.0.0") pass def test_constructor(self): @@ -44,7 +50,7 @@ def test_constructor(self): # Just make sure it doesn't crash while parsing super invalid strings. for name, string in test_strings.items(): - hed_string = HedString(string) + hed_string = HedString(string, self.schema) self.assertEqual(bool(hed_string), expected_result[name]) if bool(hed_string): @@ -55,12 +61,12 @@ def test_constructor(self): class HedTagLists(TestHedStrings): def test_type(self): hed_string = 'Event/Category/Experimental stimulus,Item/Object/Vehicle/Train,Attribute/Visual/Color/Purple' - result = HedString.split_into_groups(hed_string) + result = HedString.split_into_groups(hed_string, self.schema) self.assertIsInstance(result, list) def test_top_level_tags(self): hed_string = 'Event/Category/Experimental stimulus,Item/Object/Vehicle/Train,Attribute/Visual/Color/Purple' - result = HedString.split_into_groups(hed_string) + result = HedString.split_into_groups(hed_string, self.schema) tags_as_strings = [str(tag) for tag in result] self.assertCountEqual(tags_as_strings, ['Event/Category/Experimental stimulus', 'Item/Object/Vehicle/Train', 'Attribute/Visual/Color/Purple']) @@ -68,7 +74,7 @@ def test_top_level_tags(self): def test_group_tags(self): hed_string = '/Action/Reach/To touch,(/Attribute/Object side/Left,/Participant/Effect/Body part/Arm),' \ '/Attribute/Location/Screen/Top/70 px,/Attribute/Location/Screen/Left/23 px ' - string_obj = HedString(hed_string) + string_obj = HedString(hed_string, self.schema) tags_as_strings = [str(tag) for tag in string_obj.children] self.assertCountEqual(tags_as_strings, ['/Action/Reach/To touch', @@ -78,10 +84,10 @@ def test_group_tags(self): def test_square_brackets_in_string(self): # just verifying this parses, square brackets do not validate hed_string = '[test_ref], Event/Sensory-event, Participant, ([test_ref2], Event)' - string_obj = HedString(hed_string) + string_obj = HedString(hed_string, self.schema) tags_as_strings = [str(tag) for tag in string_obj.children] self.assertCountEqual(tags_as_strings, - ['[test_ref]', 'Event/Sensory-event', 'Participant', '([test_ref2],Event)']) + ['[test_ref]', 'Sensory-event', 'Participant', '([test_ref2],Event)']) # Potentially restore some similar behavior later if desired. # We no longer automatically remove things like quotes. @@ -118,7 +124,7 @@ def test_blanks(self): } def test_function(string): - return [str(child) for child in HedString.split_into_groups(string)] + return [str(child) for child in HedString.split_into_groups(string, self.schema)] self.validator_list(test_strings, expected_results, test_function) @@ -127,7 +133,7 @@ class ProcessedHedTags(TestHedStrings): def test_parsed_tags(self): hed_string = '/Action/Reach/To touch,(/Attribute/Object side/Left,/Participant/Effect/Body part/Arm),' \ '/Attribute/Location/Screen/Top/70 px,/Attribute/Location/Screen/Left/23 px ' - parsed_string = HedString(hed_string) + parsed_string = HedString(hed_string, self.schema) self.assertCountEqual([str(tag) for tag in parsed_string.get_all_tags()], [ '/Action/Reach/To touch', '/Attribute/Object side/Left', @@ -204,3 +210,100 @@ def test_shrink_defs(self): hed_string.shrink_defs() self.assertEqual(str(hed_string), expected_results[key]) + +class TestFromHedStrings(unittest.TestCase): + def setUp(self): + self.schema = load_schema_version("8.1.0") + self.hed_strings = [ + HedString('Event', self.schema), + HedString('Action', self.schema), + HedString('Age/20', self.schema), + HedString('Item', self.schema), + ] + + def test_from_hed_strings(self): + combined_hed_string = HedString.from_hed_strings(self.hed_strings) + + # Test that the combined hed string is as expected + self.assertEqual(combined_hed_string._hed_string, 'Event,Action,Age/20,Item') + + # Test that the schema of the combined hed string is the same as the first hed string + self.assertEqual(combined_hed_string._schema, self.schema) + + # Test that the contents of the combined hed string is the concatenation of the contents of all hed strings + expected_contents = [child for hed_string in self.hed_strings for child in hed_string.children] + self.assertEqual(combined_hed_string.children, expected_contents) + + # Test that the _from_strings attribute of the combined hed string is the list of original hed strings + self.assertEqual(combined_hed_string._from_strings, self.hed_strings) + + def test_empty_hed_strings_list(self): + with self.assertRaises(TypeError): + HedString.from_hed_strings([]) + + def test_none_hed_strings_list(self): + with self.assertRaises(TypeError): + HedString.from_hed_strings(None) + + def test_complex_hed_strings(self): + complex_hed_strings = [ + HedString('Event,Action', self.schema), + HedString('Age/20,Hand', self.schema), + HedString('Item,(Leg, Nose)', self.schema), + ] + + combined_hed_string = HedString.from_hed_strings(complex_hed_strings) + + # Test that the combined hed string is as expected + self.assertEqual(combined_hed_string._hed_string, 'Event,Action,Age/20,Hand,Item,(Leg, Nose)') + + # Test that the schema of the combined hed string is the same as the first hed string + self.assertEqual(combined_hed_string._schema, self.schema) + + # Test that the contents of the combined hed string is the concatenation of the contents of all hed strings + expected_contents = [child for hed_string in complex_hed_strings for child in hed_string.children] + self.assertEqual(combined_hed_string.children, expected_contents) + + # Test that the _from_strings attribute of the combined hed string is the list of original hed strings + self.assertEqual(combined_hed_string._from_strings, complex_hed_strings) + + def _verify_copied_string(self, original_hed_string): + # Make a deepcopy of the original HedString + copied_hed_string = copy.deepcopy(original_hed_string) + + # The copied HedString should not be the same object as the original + self.assertNotEqual(id(original_hed_string), id(copied_hed_string)) + + # The copied HedString should have the same _hed_string as the original + self.assertEqual(copied_hed_string._hed_string, original_hed_string._hed_string) + + # The _children attribute of copied HedString should not be the same object as the original + self.assertNotEqual(id(original_hed_string._children), id(copied_hed_string._children)) + + # The _children attribute of copied HedString should have the same contents as the original + self.assertEqual(copied_hed_string._children, original_hed_string._children) + + # The parent of each child in copied_hed_string._children should point to copied_hed_string + for child in copied_hed_string._children: + self.assertEqual(child._parent, copied_hed_string) + + # The _original_children and _from_strings attributes should also be deepcopied + self.assertNotEqual(id(original_hed_string._original_children), id(copied_hed_string._original_children)) + self.assertEqual(copied_hed_string._original_children, original_hed_string._original_children) + if original_hed_string._from_strings: + self.assertNotEqual(id(original_hed_string._from_strings), id(copied_hed_string._from_strings)) + self.assertEqual(copied_hed_string._from_strings, original_hed_string._from_strings) + + def test_deepcopy(self): + original_hed_string = HedString('Event,Action', self.schema) + + self._verify_copied_string(original_hed_string) + complex_hed_strings = [ + HedString('Event,Action', self.schema), + HedString('Age/20,Hand', self.schema), + HedString('Item,(Leg, Nose)', self.schema), + ] + + combined_hed_string = HedString.from_hed_strings(complex_hed_strings) + + self._verify_copied_string(combined_hed_string) \ No newline at end of file diff --git a/tests/models/test_hed_string_comb.py b/tests/models/test_hed_string_comb.py deleted file mode 100644 index 934a3ffd6..000000000 --- a/tests/models/test_hed_string_comb.py +++ /dev/null @@ -1,67 +0,0 @@ -import unittest -import os - -from hed import schema -from hed.models import HedString -from hed.models import HedTag -from hed.models.hed_string_group import HedStringGroup -import copy - - -class Test(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.base_data_dir = os.path.join(os.path.dirname(os.path.realpath(__file__)), '../data/') - hed_xml_file = os.path.join(cls.base_data_dir, "schema_tests/HED8.0.0t.xml") - cls.hed_schema = schema.load_schema(hed_xml_file) - - def test_remove_groups(self): - string1 = HedString("Item/Object", self.hed_schema) - string2 = HedString("Event, (Event, Square)", self.hed_schema) - comb_string = HedStringGroup([string1, string2]) - self.assertEqual(len(comb_string.get_all_tags()), 4) - self.assertEqual(len(string1.get_all_tags()), 1) - self.assertEqual(len(string2.get_all_tags()), 3) - tags = comb_string.find_tags(["Object".lower()], include_groups=0) - comb_string.remove(tags) - self.assertEqual(len(string1.get_all_tags()), 0) - self.assertEqual(len(string2.get_all_tags()), 3) - self.assertEqual(len(comb_string.get_all_tags()), 3) - - tags = comb_string.find_tags(["Event".lower()], recursive=True, include_groups=0) - comb_string.remove(tags) - self.assertEqual(len(string2.get_all_tags()), 1) - self.assertEqual(len(comb_string.get_all_tags()), 1) - - tags = comb_string.find_tags(["Square".lower()], recursive=True, include_groups=0) - comb_string.remove(tags) - self.assertEqual(len(string2.get_all_tags()), 0) - self.assertEqual(len(comb_string.get_all_tags()), 0) - - def test_replace(self): - string1 = HedString("Item/Object", self.hed_schema) - string2 = HedString("Event, (Event, Square)", self.hed_schema) - new_contents = HedTag("Def/TestTag", hed_schema=self.hed_schema) - comb_string = HedStringGroup([string1, string2]) - self.assertEqual(len(string1.get_all_tags()), 1) - self.assertEqual(len(string2.get_all_tags()), 3) - self.assertEqual(len(comb_string.get_all_tags()), 4) - tags = comb_string.find_tags(["Object".lower()], include_groups=0) - comb_string.replace(tags[0], copy.copy(new_contents)) - - self.assertEqual(len(string1.get_all_tags()), 1) - self.assertEqual(len(string2.get_all_tags()), 3) - self.assertEqual(len(comb_string.get_all_tags()), 4) - - tags = comb_string.find_tags(["Event".lower()], include_groups=0) - comb_string.replace(tags[0], copy.copy(new_contents)) - self.assertEqual(len(string1.get_all_tags()), 1) - self.assertEqual(len(string2.get_all_tags()), 3) - self.assertEqual(len(comb_string.get_all_tags()), 4) - - tag_group = comb_string.find_tags(["Event".lower()], recursive=True, include_groups=2) - tag, group = tag_group[0][0], tag_group[0][1] - group.replace(tag, copy.copy(new_contents)) - self.assertEqual(len(string1.get_all_tags()), 1) - self.assertEqual(len(string2.get_all_tags()), 3) - self.assertEqual(len(comb_string.get_all_tags()), 4) diff --git a/tests/models/test_tabular_input.py b/tests/models/test_tabular_input.py index 95eb527f0..7f5282e47 100644 --- a/tests/models/test_tabular_input.py +++ b/tests/models/test_tabular_input.py @@ -82,7 +82,6 @@ def test_validate_file_warnings(self): issues2 = self.sidecar1.validate(hed_schema=self.hed_schema, error_handler=ErrorHandler(False)) input_file2 = TabularInput(self.events_path, sidecar=self.sidecar2) issues2a = input_file2.validate(hed_schema=self.hed_schema, error_handler=ErrorHandler(False)) - breakHere = 3 def test_invalid_file(self): for invalid_input in self.invalid_inputs: diff --git a/tests/schema/test_base_tag.py b/tests/schema/test_base_tag.py index 4fa325289..ba477aecb 100644 --- a/tests/schema/test_base_tag.py +++ b/tests/schema/test_base_tag.py @@ -17,8 +17,8 @@ def setUpClass(cls): def tag_form_base(self, test_strings, expected_results, expected_errors): for test_key in test_strings: - test_string_obj = HedString(test_strings[test_key]) - test_errors = test_string_obj.convert_to_canonical_forms(hed_schema=self.hed_schema) + test_string_obj = HedString(test_strings[test_key], self.hed_schema) + test_errors = test_string_obj._calculate_to_canonical_forms(hed_schema=self.hed_schema) expected_error = expected_errors[test_key] expected_result = expected_results[test_key] for tag in test_string_obj.tags(): @@ -27,8 +27,8 @@ def tag_form_base(self, test_strings, expected_results, expected_errors): def tag_form_org_base(self, test_strings, expected_results, expected_errors): for test_key in test_strings: - test_string_obj = HedString(test_strings[test_key]) - test_errors = test_string_obj.convert_to_canonical_forms(hed_schema=self.hed_schema) + test_string_obj = HedString(test_strings[test_key], self.hed_schema) + test_errors = test_string_obj._calculate_to_canonical_forms(hed_schema=self.hed_schema) expected_error = expected_errors[test_key] expected_result = expected_results[test_key] for tag in test_string_obj.tags(): diff --git a/tests/schema/test_convert_tags.py b/tests/schema/test_convert_tags.py index c015facb6..9ca838c8c 100644 --- a/tests/schema/test_convert_tags.py +++ b/tests/schema/test_convert_tags.py @@ -12,10 +12,10 @@ class TestTagFormat(TestHedBase): class TestConvertTag(TestTagFormat): def converter_base(self, test_strings, expected_results, expected_errors, convert_to_short=True): for test_key in test_strings: - test_string_obj = HedString(test_strings[test_key]) + test_string_obj = HedString(test_strings[test_key], self.hed_schema) error_handler = ErrorHandler() error_handler.push_error_context(ErrorContext.HED_STRING, test_string_obj) - test_issues = test_string_obj.convert_to_canonical_forms(self.hed_schema) + test_issues = test_string_obj._calculate_to_canonical_forms(self.hed_schema) if convert_to_short: string_result = test_string_obj.get_as_short() else: diff --git a/tests/schema/test_hed_schema.py b/tests/schema/test_hed_schema.py index f7a5c29c2..4c30e1c52 100644 --- a/tests/schema/test_hed_schema.py +++ b/tests/schema/test_hed_schema.py @@ -88,10 +88,6 @@ def test_get_all_tags(self): self.assertTrue(isinstance(terms, list)) self.assertTrue(len(terms) > 0) - def test_find_duplicate_tags(self): - dupe_tags = self.hed_schema_3g.find_duplicate_tags() - self.assertEqual(len(dupe_tags), 0) - def test_get_desc_dict(self): desc_dict = self.hed_schema_3g.get_desc_iter() self.assertEqual(len(list(desc_dict)), 1117) @@ -110,8 +106,7 @@ def test_get_tag_description(self): self.assertEqual(desc, None) def test_get_all_tag_attributes(self): - test_string = HedString("Jerk-rate/#") - test_string.convert_to_canonical_forms(self.hed_schema_3g) + test_string = HedString("Jerk-rate/#", self.hed_schema_3g) tag_props = self.hed_schema_3g.get_all_tag_attributes(test_string) expected_props = { "takesValue": "true", @@ -125,8 +120,7 @@ def test_get_all_tag_attributes(self): } self.assertCountEqual(tag_props, expected_props) - test_string = HedString("Statistical-value") - test_string.convert_to_canonical_forms(self.hed_schema_3g) + test_string = HedString("Statistical-value", self.hed_schema_3g) tag_props = self.hed_schema_3g.get_all_tag_attributes(test_string) expected_props = { HedKey.ExtensionAllowed: "true", @@ -140,7 +134,7 @@ def test_get_hed_xml_version(self): self.assertEqual(get_hed_xml_version(self.hed_xml_3g), "8.0.0") def test_has_duplicate_tags(self): - self.assertFalse(self.hed_schema_3g.has_duplicate_tags) + self.assertFalse(self.hed_schema_3g._has_duplicate_tags) def test_short_tag_mapping(self): self.assertEqual(len(self.hed_schema_3g.all_tags.keys()), 1110) diff --git a/tests/schema/test_hed_schema_io.py b/tests/schema/test_hed_schema_io.py index b341f3a3d..87c2416ae 100644 --- a/tests/schema/test_hed_schema_io.py +++ b/tests/schema/test_hed_schema_io.py @@ -57,49 +57,49 @@ def test_load_schema_version(self): ver1 = "8.0.0" schemas1 = load_schema_version(ver1) self.assertIsInstance(schemas1, HedSchema, "load_schema_version returns a HedSchema if a string version") - self.assertEqual(schemas1.version, "8.0.0", "load_schema_version has the right version") + self.assertEqual(schemas1.version_number, "8.0.0", "load_schema_version has the right version") self.assertEqual(schemas1.library, "", "load_schema_version standard schema has no library") ver2 = "base:8.0.0" schemas2 = load_schema_version(ver2) self.assertIsInstance(schemas2, HedSchema, "load_schema_version returns HedSchema version+namespace") - self.assertEqual(schemas2.version, "8.0.0", "load_schema_version has the right version with namespace") + self.assertEqual(schemas2.version_number, "8.0.0", "load_schema_version has the right version with namespace") self.assertEqual(schemas2._namespace, "base:", "load_schema_version has the right version with namespace") ver3 = ["base:8.0.0"] schemas3 = load_schema_version(ver3) self.assertIsInstance(schemas3, HedSchema, "load_schema_version returns HedSchema version+namespace") - self.assertEqual(schemas3.version, "8.0.0", "load_schema_version has the right version with namespace") + self.assertEqual(schemas3.version_number, "8.0.0", "load_schema_version has the right version with namespace") self.assertEqual(schemas3._namespace, "base:", "load_schema_version has the right version with namespace") ver3 = ["base:"] schemas3 = load_schema_version(ver3) self.assertIsInstance(schemas3, HedSchema, "load_schema_version returns HedSchema version+namespace") - self.assertTrue(schemas3.version, "load_schema_version has the right version with namespace") + self.assertTrue(schemas3.version_number, "load_schema_version has the right version with namespace") self.assertEqual(schemas3._namespace, "base:", "load_schema_version has the right version with namespace") def test_load_schema_version_libraries(self): ver1 = "score_1.0.0" schemas1 = load_schema_version(ver1) self.assertIsInstance(schemas1, HedSchema, "load_schema_version returns a HedSchema if a string version") - self.assertEqual(schemas1.version, "1.0.0", "load_schema_version has the right version") + self.assertEqual(schemas1.version_number, "1.0.0", "load_schema_version has the right version") self.assertEqual(schemas1.library, "score", "load_schema_version works with single library no namespace") - self.assertEqual(schemas1.get_formatted_version(), "score_1.0.0", + self.assertEqual(schemas1.get_formatted_version(), '"score_1.0.0"', "load_schema_version gives correct version_string with single library no namespace") ver1 = "score_" schemas1 = load_schema_version(ver1) self.assertIsInstance(schemas1, HedSchema, "load_schema_version returns a HedSchema if a string version") - self.assertTrue(schemas1.version, "load_schema_version has the right version") + self.assertTrue(schemas1.version_number, "load_schema_version has the right version") self.assertEqual(schemas1.library, "score", "load_schema_version works with single library no namespace") ver1 = "score" schemas1 = load_schema_version(ver1) self.assertIsInstance(schemas1, HedSchema, "load_schema_version returns a HedSchema if a string version") - self.assertTrue(schemas1.version, "load_schema_version has the right version") + self.assertTrue(schemas1.version_number, "load_schema_version has the right version") self.assertEqual(schemas1.library, "score", "load_schema_version works with single library no namespace") ver2 = "base:score_1.0.0" schemas2 = load_schema_version(ver2) self.assertIsInstance(schemas2, HedSchema, "load_schema_version returns HedSchema version+namespace") - self.assertEqual(schemas2.version, "1.0.0", "load_schema_version has the right version with namespace") + self.assertEqual(schemas2.version_number, "1.0.0", "load_schema_version has the right version with namespace") self.assertEqual(schemas2._namespace, "base:", "load_schema_version has the right version with namespace") - self.assertEqual(schemas2.get_formatted_version(as_string=True), "base:score_1.0.0", + self.assertEqual(schemas2.get_formatted_version(), '"base:score_1.0.0"', "load_schema_version gives correct version_string with single library with namespace") ver3 = ["8.0.0", "sc:score_1.0.0"] schemas3 = load_schema_version(ver3) @@ -107,41 +107,42 @@ def test_load_schema_version_libraries(self): self.assertIsInstance(schemas3._schemas, dict, "load_schema_version group keeps dictionary of hed versions") self.assertEqual(len(schemas3._schemas), 2, "load_schema_version group dictionary is right length") s = schemas3._schemas[""] - self.assertEqual(s.version, "8.0.0", "load_schema_version has the right version with namespace") - self.assertEqual(schemas3.get_formatted_version(as_string=True), '["8.0.0", "sc:score_1.0.0"]', + self.assertEqual(s.version_number, "8.0.0", "load_schema_version has the right version with namespace") + self.assertEqual(schemas3.get_formatted_version(), '["8.0.0", "sc:score_1.0.0"]', "load_schema_version gives correct version_string with single library with namespace") - formatted_list = schemas3.get_formatted_version(as_string=False) - self.assertEqual(len(formatted_list), 2, - "load_schema_version gives correct version_string with single library with namespace") - ver4 = ["ts:8.0.0", "sc:score_1.0.0"] - schemas4 = load_schema_version(ver4) + formatted_list = schemas3.get_formatted_version() + schemas4 = load_schema_version(formatted_list) self.assertIsInstance(schemas4, HedSchemaGroup, "load_schema_version returns HedSchema version+namespace") self.assertIsInstance(schemas4._schemas, dict, "load_schema_version group keeps dictionary of hed versions") self.assertEqual(len(schemas4._schemas), 2, "load_schema_version group dictionary is right length") - self.assertEqual(schemas4.get_formatted_version(), '["ts:8.0.0", "sc:score_1.0.0"]', + self.assertEqual(schemas4.get_formatted_version(), '["8.0.0", "sc:score_1.0.0"]', "load_schema_version gives correct version_string with multiple prefixes") - s = schemas4._schemas["ts:"] - self.assertEqual(s.version, "8.0.0", "load_schema_version has the right version with namespace") + s = schemas4._schemas["sc:"] + self.assertEqual(s.version_number, "1.0.0", "load_schema_version has the right version with namespace") with self.assertRaises(KeyError) as context: - schemas4._schemas[""] - self.assertEqual(context.exception.args[0], '') + schemas4._schemas["ts:"] + self.assertEqual(context.exception.args[0], 'ts:') + + with self.assertRaises(HedFileError) as context: + load_schema_version("[Malformed,,json]") + def test_load_schema_version_empty(self): schemas = load_schema_version("") self.assertIsInstance(schemas, HedSchema, "load_schema_version for empty string returns latest version") - self.assertTrue(schemas.version, "load_schema_version for empty string has a version") + self.assertTrue(schemas.version_number, "load_schema_version for empty string has a version") self.assertFalse(schemas.library, "load_schema_version for empty string is not a library") schemas = load_schema_version(None) self.assertIsInstance(schemas, HedSchema, "load_schema_version for None returns latest version") - self.assertTrue(schemas.version, "load_schema_version for empty string has a version") + self.assertTrue(schemas.version_number, "load_schema_version for empty string has a version") self.assertFalse(schemas.library, "load_schema_version for empty string is not a library") schemas = load_schema_version([""]) self.assertIsInstance(schemas, HedSchema, "load_schema_version list with blank entry returns latest version") - self.assertTrue(schemas.version, "load_schema_version for empty string has a version") + self.assertTrue(schemas.version_number, "load_schema_version for empty string has a version") self.assertFalse(schemas.library, "load_schema_version for empty string is not a library") schemas = load_schema_version([]) self.assertIsInstance(schemas, HedSchema, "load_schema_version list with blank entry returns latest version") - self.assertTrue(schemas.version, "load_schema_version for empty string has a version") + self.assertTrue(schemas.version_number, "load_schema_version for empty string has a version") self.assertFalse(schemas.library, "load_schema_version for empty string is not a library") class TestHedSchemaMerging(unittest.TestCase): diff --git a/tests/tools/analysis/test_analysis_util_convert.py b/tests/tools/analysis/test_analysis_util_convert.py index 57111065a..7150b8b58 100644 --- a/tests/tools/analysis/test_analysis_util_convert.py +++ b/tests/tools/analysis/test_analysis_util_convert.py @@ -19,7 +19,7 @@ def test_convert_list(self): pass def test_convert_hed_tag(self): - tag1 = HedTag('Label/Cond1') + tag1 = HedTag('Label/Cond1', self.hed_schema) str1 = hed_to_str(tag1) self.assertIsInstance(str1, str) self.assertEqual(str1, 'Label/Cond1') @@ -28,7 +28,6 @@ def test_convert_hed_tag(self): self.assertIsInstance(str2, str) self.assertEqual(str2, 'Label/Cond1') tag3 = HedTag('Label/Cond1', hed_schema=self.hed_schema) - tag3.convert_to_canonical_forms(tag3._schema) str3 = hed_to_str(tag3) self.assertIsInstance(str3, str) self.assertEqual(str3, 'Label/Cond1') @@ -47,7 +46,7 @@ def test_hed_to_str_other(self): def test_hed_to_str_obj(self): - str_obj1 = HedString('Label/Cond1') + str_obj1 = HedString('Label/Cond1', self.hed_schema) str1 = hed_to_str(str_obj1) self.assertIsInstance(str1, str) self.assertEqual(str1, 'Label/Cond1') @@ -56,7 +55,6 @@ def test_hed_to_str_obj(self): self.assertIsInstance(str2, str) self.assertEqual(str2, 'Label/Cond1') str_obj3 = HedString('Label/Cond1', hed_schema=self.hed_schema) - str_obj3.convert_to_canonical_forms(self.hed_schema) str3 = hed_to_str(str_obj3) self.assertIsInstance(str3, str) self.assertEqual(str3, 'Label/Cond1') diff --git a/tests/tools/analysis/test_hed_type_definitions.py b/tests/tools/analysis/test_hed_type_definitions.py index 15cbedce2..7388d1228 100644 --- a/tests/tools/analysis/test_hed_type_definitions.py +++ b/tests/tools/analysis/test_hed_type_definitions.py @@ -92,7 +92,7 @@ def test_get_def_names(self): no_value=False) self.assertEqual(len(d), 3, "get_def_names returns right number of items if multiple defs") self.assertEqual(d[0], 'cond3/6', "get_def_names returns the correct item if multiple def") - e = def_man.get_def_names(HedString('((Red, Blue, (Green), Black))')) + e = def_man.get_def_names(HedString('((Red, Blue, (Green), Black))', hed_schema=self.schema)) self.assertFalse(e, "get_def_names returns no items if no defs") def test_split_name(self): diff --git a/tests/tools/remodeling/operations/test_summarize_definitions_op.py b/tests/tools/remodeling/operations/test_summarize_definitions_op.py index 6cfddbd90..3e6843b25 100644 --- a/tests/tools/remodeling/operations/test_summarize_definitions_op.py +++ b/tests/tools/remodeling/operations/test_summarize_definitions_op.py @@ -90,6 +90,22 @@ def test_summary_errors(self): self.assertIsInstance(dispatch.summary_dicts[sum_op.summary_name], DefinitionSummary) # print(str(dispatch.summary_dicts[sum_op.summary_name].get_text_summary()['Dataset'])) + def test_ambiguous_def_errors(self): + dispatch = Dispatcher([], data_root=None, backup_name=None, hed_versions=['8.1.0']) + parms = json.loads(self.json_parms) + sum_op = SummarizeDefinitionsOp(parms) + df = pd.DataFrame({"HED": [ + "(Def-expand/B2/3, (Action/3, Collection/animals, Acceleration/3))", + ]}) + df_new = sum_op.do_op(dispatch, dispatch.prep_data(df), 'subj2_run1', sidecar=self.json_path) + self.assertIn(sum_op.summary_name, dispatch.summary_dicts) + self.assertIsInstance(dispatch.summary_dicts[sum_op.summary_name], DefinitionSummary) + # print(str(dispatch.summary_dicts[sum_op.summary_name].get_text_summary()['Dataset'])) + cont = dispatch.summary_dicts + context = cont.get("get_definition_summary", None) + self.assertIsInstance(context, DefinitionSummary, "get_summary testing DefinitionSummary") + summary1a = context.get_summary() + if __name__ == '__main__': unittest.main() diff --git a/tests/validator/test_def_validator.py b/tests/validator/test_def_validator.py index bbaf3eb58..d6110ec65 100644 --- a/tests/validator/test_def_validator.py +++ b/tests/validator/test_def_validator.py @@ -250,29 +250,6 @@ def test_expand_def_tags_placeholder(self): remove_definitions=True, basic_definition_string=self.placeholder_definition_string) - # todo: finish updating these - # # special case test - # def test_changing_tag_then_def_mapping(self): - # def_dict = DefinitionDict() - # def_string = HedString(self.basic_definition_string) - # def_string.convert_to_canonical_forms(None) - # def_dict.check_for_definitions(def_string) - # def_mapper = DefMapper(def_dict) - # validator = HedValidator(self.hed_schema) - # hed_ops = [validator, def_mapper] - # - # test_string = HedString(self.label_def_string) - # tag = test_string.children[0] - # tag.tag = "Organizational-property/" + str(tag) - # def_issues = test_string.validate(hed_ops, expand_defs=True) - # self.assertFalse(def_issues) - # self.assertEqual(test_string.get_as_short(), f"{self.expanded_def_string}") - # - # test_string = HedString(self.label_def_string) - # tag = test_string.children[0] - # tag.tag = "Organizational-property22/" + str(tag) - # def_issues = test_string.validate(hed_ops, expand_defs=True) - # self.assertTrue(def_issues) diff --git a/tests/validator/test_hed_validator.py b/tests/validator/test_hed_validator.py index 451241377..0e3bcdfab 100644 --- a/tests/validator/test_hed_validator.py +++ b/tests/validator/test_hed_validator.py @@ -32,7 +32,7 @@ def setUpClass(cls): SpreadsheetInput(cls.hed_filepath_major_errors_multi_column, tag_columns=[1, 2]) def test__validate_input(self): - test_string_obj = HedString(self.base_hed_input) + test_string_obj = HedString(self.base_hed_input, self.hed_schema) validation_issues = test_string_obj.validate(self.hed_schema) self.assertIsInstance(validation_issues, list) @@ -170,14 +170,14 @@ def test_error_spans_from_file_and_missing_required_column(self): # todo: move this test somewhere more appropriate def test_org_tag_missing(self): - test_string_obj = HedString("Event, Item/NotItem") + test_string_obj = HedString("Event, Item/NotItem", self.hed_schema) removed_tag = test_string_obj.tags()[0] test_string_obj.remove([removed_tag]) from hed import HedTag source_span = test_string_obj._get_org_span(removed_tag) self.assertEqual(source_span, (0, 5)) - source_span = test_string_obj._get_org_span(HedTag("Event")) + source_span = test_string_obj._get_org_span(HedTag("Event", self.hed_schema)) self.assertEqual(source_span, (None, None)) diff --git a/tests/validator/test_onset_validator.py b/tests/validator/test_onset_validator.py index b1acb3962..5e20d8ad4 100644 --- a/tests/validator/test_onset_validator.py +++ b/tests/validator/test_onset_validator.py @@ -65,7 +65,7 @@ def _test_issues_base(self, test_strings, test_issues, test_context, placeholder def _test_issues_no_context(self, test_strings, test_issues): hed_validator = HedValidator(self.hed_schema, self.def_dict_both) for string, expected_params in zip(test_strings, test_issues): - test_string = HedString(string) + test_string = HedString(string, self.hed_schema) error_handler = ErrorHandler(check_for_warnings=False) error_handler.push_error_context(ErrorContext.HED_STRING, test_string) onset_issues = hed_validator.validate(test_string, False) diff --git a/tests/validator/test_tag_validator.py b/tests/validator/test_tag_validator.py index 471075553..3c4c095a1 100644 --- a/tests/validator/test_tag_validator.py +++ b/tests/validator/test_tag_validator.py @@ -388,11 +388,11 @@ def test_no_duplicates(self): 'legalDuplicate': [], 'noDuplicate': [], 'duplicateGroup': self.format_error(ValidationErrors.HED_TAG_REPEATED_GROUP, - group=HedString("(Sensory-event, Man-made-object/VehicleTrain)")), + group=HedString("(Sensory-event, Man-made-object/VehicleTrain)", self.hed_schema)), 'duplicateSubGroup': self.format_error(ValidationErrors.HED_TAG_REPEATED_GROUP, - group=HedString("(Event,(Sensory-event,Man-made-object/VehicleTrain))")), + group=HedString("(Event,(Sensory-event,Man-made-object/VehicleTrain))", self.hed_schema)), 'duplicateSubGroupF': self.format_error(ValidationErrors.HED_TAG_REPEATED_GROUP, - group=HedString("((Sensory-event,Man-made-object/VehicleTrain),Event)")), + group=HedString("((Sensory-event,Man-made-object/VehicleTrain),Event)", self.hed_schema)), } self.validator_semantic(test_strings, expected_results, expected_issues, False) diff --git a/tests/validator/test_tag_validator_base.py b/tests/validator/test_tag_validator_base.py index f50c41af4..690ed79bd 100644 --- a/tests/validator/test_tag_validator_base.py +++ b/tests/validator/test_tag_validator_base.py @@ -66,20 +66,18 @@ class TestValidatorBase(TestHedBase): def setUpClass(cls): super().setUpClass() cls.error_handler = error_reporter.ErrorHandler() - # cls.syntactic_hed_input_reader = HedValidator(hed_schema=None) - # cls.syntactic_tag_validator = cls.syntactic_hed_input_reader._tag_validator cls.semantic_hed_input_reader = HedValidator(hed_schema=cls.hed_schema) cls.semantic_tag_validator = cls.semantic_hed_input_reader._tag_validator def validator_base(self, test_strings, expected_results, expected_issues, test_function, - hed_schema=None, check_for_warnings=False): + hed_schema, check_for_warnings=False): for test_key in test_strings: - hed_string_obj = HedString(test_strings[test_key]) + hed_string_obj = HedString(test_strings[test_key], self.hed_schema) error_handler = ErrorHandler(check_for_warnings=check_for_warnings) error_handler.push_error_context(ErrorContext.HED_STRING, hed_string_obj) test_issues = [] if self.compute_forms: - test_issues += hed_string_obj.convert_to_canonical_forms(hed_schema) + test_issues += hed_string_obj._calculate_to_canonical_forms(hed_schema) if not test_issues: test_issues += test_function(hed_string_obj) expected_params = expected_issues[test_key]