Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HedStrings now always require a Schema #720

Merged
merged 2 commits into from
Jul 9, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 0 additions & 10 deletions hed/errors/error_reporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
ErrorContext.ROW,
ErrorContext.COLUMN,
ErrorContext.LINE,
# ErrorContext.HED_STRING, # temporarily disable hed string sort(maybe perm, not sure it's needed)
ErrorContext.SCHEMA_SECTION,
ErrorContext.SCHEMA_TAG,
ErrorContext.SCHEMA_ATTRIBUTE,
Expand All @@ -33,10 +32,6 @@
ErrorContext.ROW
]

hed_string_sort_list = [
ErrorContext.HED_STRING
]

def _register_error_function(error_type, wrapper_func):
if error_type in error_functions:
raise KeyError(f"{error_type} defined more than once.")
Expand Down Expand Up @@ -191,11 +186,8 @@ def push_error_context(self, context_type, context):

"""
if context is None:
from hed import HedString
if context_type in int_sort_list:
context = 0
elif context_type in hed_string_sort_list:
context = HedString("")
else:
context = ""
self.error_context.append((context_type, context))
Expand Down Expand Up @@ -430,8 +422,6 @@ def _get_keys(d):
for key in default_sort_list:
if key in int_sort_list:
result.append(d.get(key, -1))
elif key in hed_string_sort_list:
result.append(d.get(key, HedString("")))
else:
result.append(d.get(key, ""))
return tuple(result)
Expand Down
1 change: 0 additions & 1 deletion hed/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
from .hed_group import HedGroup
from .spreadsheet_input import SpreadsheetInput
from .hed_string import HedString
from .hed_string_group import HedStringGroup
from .hed_tag import HedTag
from .sidecar import Sidecar
from .tabular_input import TabularInput
Expand Down
2 changes: 1 addition & 1 deletion hed/models/base_input.py
Original file line number Diff line number Diff line change
Expand Up @@ -493,7 +493,7 @@ def combine_dataframe(dataframe):
)
return dataframe

def get_def_dict(self, hed_schema=None, extra_def_dicts=None):
def get_def_dict(self, hed_schema, extra_def_dicts=None):
""" Returns the definition dict for this file

Note: Baseclass implementation returns just extra_def_dicts.
Expand Down
4 changes: 2 additions & 2 deletions hed/models/column_mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -378,11 +378,11 @@ def _finalize_mapping(self):
def _remove_from_list(list_to_alter, to_remove):
return [item for item in list_to_alter if item not in to_remove]

def get_def_dict(self, hed_schema=None, extra_def_dicts=None):
def get_def_dict(self, hed_schema, extra_def_dicts=None):
""" Return def dicts from every column description.

Parameters:
hed_schema (Schema or None): A HED schema object to use for extracting definitions.
hed_schema (Schema): A HED schema object to use for extracting definitions.
extra_def_dicts (list, DefinitionDict, or None): Extra dicts to add to the list.

Returns:
Expand Down
3 changes: 0 additions & 3 deletions hed/models/def_expand_gather.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,4 @@ def get_ambiguous_group(ambiguous_def):
Returns:
HedGroup: the ambiguous definition with known placeholders filled in
"""
if not ambiguous_def:
# mostly to not crash, this shouldn't happen.
return HedString("")
return ambiguous_def.get_group()
2 changes: 1 addition & 1 deletion hed/models/definition_dict.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def add_definitions(self, def_dicts, hed_schema=None):
for definition in def_dict:
self.check_for_definitions(HedString(definition, hed_schema))
else:
raise TypeError("Invalid type '{type(def_dict)}' passed to DefinitionDict")
raise TypeError(f"Invalid type '{type(def_dict)}' passed to DefinitionDict")

def _add_definition(self, def_tag, def_value):
if def_tag in self.defs:
Expand Down
3 changes: 2 additions & 1 deletion hed/models/definition_entry.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ def __init__(self, name, contents, takes_value, source_context):
"""
self.name = name
if contents:
contents = contents.copy().sort()
contents = contents.copy()
contents.sort()
self.contents = contents
self.takes_value = takes_value
self.source_context = source_context
Expand Down
7 changes: 1 addition & 6 deletions hed/models/hed_group.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,13 +126,8 @@ def copy(self):
return return_copy

def sort(self):
""" Sort the tags and groups in this HedString in a consistent order.

Returns:
self
"""
""" Sort the tags and groups in this HedString in a consistent order."""
self.sorted(update_self=True)
return self

def sorted(self, update_self=False):
""" Returns a sorted copy of this hed group as a list of it's children
Expand Down
130 changes: 75 additions & 55 deletions hed/models/hed_string.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
This module is used to split tags in a HED string.
"""
import copy
from hed.models.hed_group import HedGroup
from hed.models.hed_tag import HedTag
from hed.models.model_constants import DefTagNames
Expand All @@ -12,14 +13,15 @@ class HedString(HedGroup):
OPENING_GROUP_CHARACTER = '('
CLOSING_GROUP_CHARACTER = ')'

def __init__(self, hed_string, hed_schema=None, def_dict=None, _contents=None):
def __init__(self, hed_string, hed_schema, def_dict=None, _contents=None):
""" Constructor for the HedString class.

Parameters:
hed_string (str): A HED string consisting of tags and tag groups.
hed_schema (HedSchema or None): The schema to use to identify tags. Can be passed later.
hed_schema (HedSchema): The schema to use to identify tags.
def_dict(DefinitionDict or None): The def dict to use to identify def/def expand tags.
_contents ([HedGroup and/or HedTag] or None): Create a HedString from this exact list of children.
Does not make a copy.
Does not make a copy.
Notes:
- The HedString object parses its component tags and groups into a tree-like structure.

Expand All @@ -33,27 +35,36 @@ def __init__(self, hed_string, hed_schema=None, def_dict=None, _contents=None):
except ValueError:
contents = []
super().__init__(hed_string, contents=contents, startpos=0, endpos=len(hed_string))
self._schema = hed_schema
self._from_strings = None

@classmethod
def from_hed_strings(cls, contents):
def from_hed_strings(cls, hed_strings):
""" Factory for creating HedStrings via combination.

Parameters:
contents (list or None): A list of HedString objects to combine. This takes ownership of their children.
hed_strings (list or None): A list of HedString objects to combine.
This takes ownership of their children.

Returns:
new_string(HedString): The newly combined HedString
"""
result = HedString.__new__(HedString)
hed_string = "".join([group._hed_string for group in contents])
contents = [child for sub_string in contents for child in sub_string.children]
result.__init__(hed_string=hed_string, _contents=contents)
return result
if not hed_strings:
raise TypeError("Passed an empty list to from_hed_strings")
new_string = HedString.__new__(HedString)
hed_string = ",".join([group._hed_string for group in hed_strings])
contents = [child for sub_string in hed_strings for child in sub_string.children]
first_schema = hed_strings[0]._schema
new_string.__init__(hed_string=hed_string, _contents=contents, hed_schema=first_schema)
new_string._from_strings = hed_strings
return new_string

@property
def is_group(self):
""" Always False since the underlying string is not a group with parentheses. """
return False

def convert_to_canonical_forms(self, hed_schema):
def _calculate_to_canonical_forms(self, hed_schema):
""" Identify all tags using the given schema.

Parameters:
Expand All @@ -65,10 +76,39 @@ def convert_to_canonical_forms(self, hed_schema):
"""
validation_issues = []
for tag in self.get_all_tags():
validation_issues += tag.convert_to_canonical_forms(hed_schema)
validation_issues += tag._calculate_to_canonical_forms(hed_schema)

return validation_issues

def __deepcopy__(self, memo):
# check if the object has already been copied
if id(self) in memo:
return memo[id(self)]

# create a new instance of HedString class, and direct copy all parameters
new_string = self.__class__.__new__(self.__class__)
new_string.__dict__.update(self.__dict__)

# add the new object to the memo dictionary
memo[id(self)] = new_string

# Deep copy the attributes that need it(most notably, we don't copy schema/schema entry)
new_string._original_children = copy.deepcopy(self._original_children, memo)
new_string._from_strings = copy.deepcopy(self._from_strings, memo)
new_string._children = copy.deepcopy(self._children, memo)

return new_string

def copy(self):
""" Return a deep copy of this string.

Returns:
HedGroup: The copied group.

"""
return_copy = copy.deepcopy(self)
return return_copy

def remove_definitions(self):
""" Remove definition tags and groups from this string.

Expand Down Expand Up @@ -118,63 +158,24 @@ def expand_defs(self):

return self

def convert_to_short(self, hed_schema):
""" Compute canonical forms and return the short form.

Parameters:
hed_schema (HedSchema or None): The schema to use to calculate forms.

Returns:
tuple:
- str: The string with all tags converted to short form.
- list: A list of issues found during conversion. Each issue is a dictionary.

Notes:
- No issues will be found if no schema is passed.

"""
conversion_issues = self.convert_to_canonical_forms(hed_schema)
short_string = self.get_as_short()
return short_string, conversion_issues

def convert_to_long(self, hed_schema):
""" Compute canonical forms and return the long form.

Parameters:
hed_schema (HedSchema or None): The schema to use to calculate forms.

Returns:
tuple:
- str: The string with all tags converted to long form.
- list: A list of issues found during conversion. Each issue is a dictionary.

Notes:
- No issues will be found if no schema is passed.

"""
conversion_issues = self.convert_to_canonical_forms(hed_schema)
short_string = self.get_as_long()
return short_string, conversion_issues

def convert_to_original(self):
def get_as_original(self):
""" Return the original form of this string.

Returns:
str: The string with all the tags in their original form.

Notes:
Potentially with some extraneous spaces removed on returned string.

"""
return self.get_as_form("org_tag")

@staticmethod
def split_into_groups(hed_string, hed_schema=None, def_dict=None):
def split_into_groups(hed_string, hed_schema, def_dict=None):
""" Split the HED string into a parse tree.

Parameters:
hed_string (str): A hed string consisting of tags and tag groups to be processed.
hed_schema (HedSchema or None): HED schema to use to identify tags.
hed_schema (HedSchema): HED schema to use to identify tags.
def_dict(DefinitionDict): The definitions to identify
Returns:
list: A list of HedTag and/or HedGroup.
Expand All @@ -190,7 +191,7 @@ def split_into_groups(hed_string, hed_schema=None, def_dict=None):
input_tags = HedString.split_hed_string(hed_string)
for is_hed_tag, (startpos, endpos) in input_tags:
if is_hed_tag:
new_tag = HedTag(hed_string, (startpos, endpos), hed_schema, def_dict)
new_tag = HedTag(hed_string, hed_schema, (startpos, endpos), def_dict)
current_tag_group[-1].append(new_tag)
else:
string_portion = hed_string[startpos:endpos]
Expand Down Expand Up @@ -239,11 +240,30 @@ def _get_org_span(self, tag_or_group):
- If the hed tag or group was not in the original string, returns (None, None).

"""
if self._from_strings:
return self._get_org_span_from_strings(tag_or_group)

if self.check_if_in_original(tag_or_group):
return tag_or_group.span

return None, None

def _get_org_span_from_strings(self, tag_or_group):
"""A different case of the above, to handle if this was created from hed string objects."""
found_string = None
string_start_index = 0
for string in self._from_strings:
if string.check_if_in_original(tag_or_group):
found_string = string
break
# Add 1 for comma
string_start_index += string.span[1] + 1

if not found_string:
return None, None

return tag_or_group.span[0] + string_start_index, tag_or_group.span[1] + string_start_index

@staticmethod
def split_hed_string(hed_string):
""" Split a HED string into delimiters and tags.
Expand Down
Loading