Skip to content

Commit

Permalink
Merge pull request #889 from VisLab/develop
Browse files Browse the repository at this point in the history
Added description fields to the remodeling JSON and removed get_assembled
  • Loading branch information
VisLab authored Mar 25, 2024
2 parents 71ba0cc + d74696b commit 86172d9
Show file tree
Hide file tree
Showing 33 changed files with 398 additions and 355 deletions.
2 changes: 1 addition & 1 deletion hed/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,4 @@
from .sidecar import Sidecar
from .tabular_input import TabularInput
from .timeseries_input import TimeseriesInput
from .df_util import get_assembled, convert_to_form, shrink_defs, expand_defs, process_def_expands
from .df_util import convert_to_form, shrink_defs, expand_defs, process_def_expands
23 changes: 0 additions & 23 deletions hed/models/df_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,29 +6,6 @@
from hed.models.model_constants import DefTagNames


def get_assembled(tabular_file, hed_schema, extra_def_dicts=None, defs_expanded=True):
""" Create an array of assembled HedString objects (or list of these) of the same length as tabular file input.
Parameters:
tabular_file (TabularInput): Represents the tabular input file.
hed_schema (HedSchema): If str, will attempt to load as a version if it doesn't have a valid extension.
extra_def_dicts: list of DefinitionDict, optional
Any extra DefinitionDict objects to use when parsing the HED tags.
defs_expanded (bool): (Default True) Expands definitions if True, otherwise shrinks them.
Returns:
tuple:
hed_strings(list of HedStrings): A list of HedStrings
def_dict(DefinitionDict): The definitions from this Sidecar.
"""

def_dict = tabular_file.get_def_dict(hed_schema, extra_def_dicts=extra_def_dicts)
series_a = tabular_file.series_a
if defs_expanded:
return [HedString(x, hed_schema, def_dict).expand_defs() for x in series_a], def_dict
else:
return [HedString(x, hed_schema, def_dict).shrink_defs() for x in series_a], def_dict


def convert_to_form(df, hed_schema, tag_form, columns=None):
""" Convert all tags in underlying dataframe to the specified form (in place).
Expand Down
2 changes: 1 addition & 1 deletion hed/models/query_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def get_query_handlers(queries, query_names=None):
query_names = [f"query_{index}" for index in range(len(queries))]

if len(queries) != len(query_names):
issues.append(f"QueryNamesLengthBad: The query_names length {len(query_names)} must be empty or equal" +
issues.append(f"QueryNamesLengthBad: The query_names length {len(query_names)} must be empty or equal " +
f"to the queries length {len(queries)}.")
elif len(set(query_names)) != len(query_names):
issues.append(f"DuplicateQueryNames: The query names {str(query_names)} list has duplicates")
Expand Down
108 changes: 43 additions & 65 deletions hed/schema/schema_io/wiki2schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@


class SchemaLoaderWiki(SchemaLoader):
""" Loads MediaWiki schemas from filenames or strings.
""" Load MediaWiki schemas from filenames or strings.
Expected usage is SchemaLoaderWiki.load(filename)
Expand Down Expand Up @@ -104,12 +104,11 @@ def _parse_sections(self, wiki_lines_by_section, parse_order):
parse_func(lines_for_section)

def _read_header_section(self, lines):
"""Ensures the header has no content other than the initial line.
"""Ensure the header has no content other than the initial line.
Parameters:
lines (int, str): Lines for the header section.
Parameters
----------
lines: [(int, str)]
Lines for this section
"""
for line_number, line in lines:
if line.strip():
Expand All @@ -120,40 +119,34 @@ def _read_text_block(self, lines):
text = ""
for line_number, line in lines:
text += line
# We expect one blank line(plus the normal line break). Any more should be preserved
# We expect one blank line(plus the normal line break). Any additional lines should be preserved.
if text.endswith("\n\n"):
text = text[:-2]
elif text.endswith("\n"):
text = text[:-1]
return text

def _read_prologue(self, lines):
"""Adds the prologue
"""Add the prologue.
Parameters
----------
lines: [(int, str)]
Lines for this section
Parameters:
lines: (int, str): Lines for prologue section.
"""
self._schema.prologue = self._read_text_block(lines)

def _read_epilogue(self, lines):
"""Adds the epilogue
"""Adds the epilogue.
Parameters
----------
lines: [(int, str)]
Lines for this section
Parameters:
lines: (int, str): Lines for the epilogue section.
"""
self._schema.epilogue = self._read_text_block(lines)

def _read_schema(self, lines):
"""Adds the main schema section
"""Add the main schema section
Parameters
----------
lines: [(int, str)]
Lines for this section
Parameters:
lines (int, str): Lines for main schema section.
"""
self._schema._initialize_attributes(HedSectionKey.Tags)
parent_tags = []
Expand All @@ -168,8 +161,8 @@ def _read_schema(self, lines):
parent_tags = parent_tags[:level]
elif level > len(parent_tags):
self._add_fatal_error(line_number, line,
"Line has too many *'s at the front. You cannot skip a level."
, HedExceptions.WIKI_LINE_START_INVALID)
"Line has too many *'s at front. You cannot skip a level.",
HedExceptions.WIKI_LINE_START_INVALID)
continue
# Create the entry
tag_entry = self._add_tag_line(parent_tags, line_number, line)
Expand All @@ -194,12 +187,10 @@ def _read_schema(self, lines):
parent_tags.append(tag_entry.short_tag_name)

def _read_unit_classes(self, lines):
"""Adds the unit classes section
"""Add the unit classes section.
Parameters
----------
lines: [(int, str)]
Lines for this section
Parameters:
lines (int, str): Lines for the unit class section.
"""
self._schema._initialize_attributes(HedSectionKey.UnitClasses)
self._schema._initialize_attributes(HedSectionKey.Units)
Expand Down Expand Up @@ -227,22 +218,18 @@ def _read_section(self, lines, section_key):
self._add_to_dict(line_number, line, new_entry, section_key)

def _read_unit_modifiers(self, lines):
"""Adds the unit modifiers section
"""Add the unit modifiers section.
Parameters
----------
lines: [(int, str)]
Lines for this section
Parameters:
lines (int, str): Lines for the unit modifiers section.
"""
self._read_section(lines, HedSectionKey.UnitModifiers)

def _read_value_classes(self, lines):
"""Adds the unit modifiers section
"""Add the value classes section.
Parameters
----------
lines: [(int, str)]
Lines for this section
Parameters:
lines (int, str): Lines for the value class section.
"""
self._read_section(lines, HedSectionKey.ValueClasses)

Expand All @@ -255,14 +242,11 @@ def _read_attributes(self, lines):
def _get_header_attributes_internal(self, version_line):
"""Extracts all valid attributes like version from the HED line in .mediawiki format.
Parameters
----------
version_line: string
The line in the wiki file that contains the version or other attributes.
Parameters:
version_line (str): The line in the wiki file that contains the version or other attributes.
Returns
-------
{}: The key is the name of the attribute, value being the value. eg {'version':'v1.0.1'}
Returns:
dict: The key is the name of the attribute, value being the value. eg {'version':'v1.0.1'}
"""
if "=" not in version_line:
return self._get_header_attributes_internal_old(version_line)
Expand All @@ -285,7 +269,7 @@ def _parse_attributes_line(version_line):
for match in attr_re.finditer(version_line):
start, end = match.span()

# If there's unmatched content between the last match and the current one
# If there's unmatched content between the last match and the current one.
if start > last_end:
unmatched.append(version_line[last_end:start])

Expand All @@ -300,16 +284,13 @@ def _parse_attributes_line(version_line):
return matches, unmatched

def _get_header_attributes_internal_old(self, version_line):
""" Extracts all valid attributes like version from the HED line in .mediawiki format.
""" Extract all valid attributes like version from the HED line in .mediawiki format.
Parameters
----------
version_line: string
The line in the wiki file that contains the version or other attributes.
Parameters:
version_line (str): The line in the wiki file that contains the version or other attributes.
Returns
-------
{}: The key is the name of the attribute, value being the value. eg {'version':'v1.0.1'}
Returns:
dict: The key is the name of the attribute, value being the value. eg {'version':'v1.0.1'}.
"""
final_attributes = {}
attribute_pairs = version_line.split(',')
Expand Down Expand Up @@ -347,17 +328,14 @@ def _get_tag_level(tag_line):
return count

def _remove_nowiki_tag_from_line(self, line_number, tag_line):
"""Removes the nowiki tag from the line.
"""Remove the nowiki tag from the line.
Parameters
----------
line_number (int): The line number to report errors as
tag_line (string): A tag line.
Parameters:
line_number (int): The line number to report errors as
tag_line (str): A tag line.
Returns
-------
string
The line with the nowiki tag removed.
Returns:
str: The line with the nowiki tag removed.
"""
index1 = tag_line.find(no_wiki_start_tag)
index2 = tag_line.find(no_wiki_end_tag)
Expand Down Expand Up @@ -401,7 +379,7 @@ def _get_tag_attributes(self, line_number, tag_line, starting_index):
""" Get the tag attributes from a line.
Parameters:
line_number (int): The line number to report errors as
line_number (int): The line number to report errors as.
tag_line (str): A tag line.
starting_index (int): The first index we can check for the brackets.
Expand Down
11 changes: 8 additions & 3 deletions hed/tools/remodeling/operations/convert_columns_op.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
""" Convert the type of the specified columns of a tabular file. """
#TODO finish implementation
# TODO finish implementation

from hed.tools.remodeling.operations.base_op import BaseOp

Expand All @@ -12,7 +12,9 @@ class ConvertColumnsOp(BaseOp):
- **convert_to** (*str*): Name of type to convert to. (One of 'str', 'int', 'float', 'fixed'.)
Optional remodeling parameters:
- **decimal_places** (*int*): Number decimal places to keep (for fixed only).
- **decimal_places** (*int*): Number decimal places to keep (for fixed only).
Notes:
"""
NAME = "convert_columns"
Expand All @@ -22,6 +24,7 @@ class ConvertColumnsOp(BaseOp):
"properties": {
"column_names": {
"type": "array",
"description": "List of names of the columns whose types are to be converted to the specified type.",
"items": {
"type": "string"
},
Expand All @@ -30,10 +33,12 @@ class ConvertColumnsOp(BaseOp):
},
"convert_to": {
"type": "string",
"description": "Data type to convert the columns to.",
"enum": ['str', 'int', 'float', 'fixed'],
},
"decimal_places": {
"type": "integer"
"type": "integer",
"description": "The number of decimal points if converted to fixed."
}
},
"required": [
Expand Down
13 changes: 10 additions & 3 deletions hed/tools/remodeling/operations/factor_column_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,11 @@ class FactorColumnOp(BaseOp):
Optional remodeling parameters
- **factor_names** (*list*): Names to use as the factor columns.
- **factor_values** (*list*): Values in the column column_name to create factors for.
- **factor_values** (*list*): Values in the column column_name to create factors for.
Notes:
- If no factor_values are provided, factors are computed for each of the unique values in column_name column.
- If factor_names are provided, then factor_values must also be provided and the two lists be the same size.
"""
NAME = "factor_column"
Expand All @@ -20,10 +24,12 @@ class FactorColumnOp(BaseOp):
"type": "object",
"properties": {
"column_name": {
"type": "string"
"type": "string",
"description": "Name of the column for which to create one-hot factors for unique values."
},
"factor_names": {
"type": "array",
"description": "Names of the resulting factor columns. If given must be same length as factor_values",
"items": {
"type": "string"
},
Expand All @@ -32,8 +38,9 @@ class FactorColumnOp(BaseOp):
},
"factor_values": {
"type": "array",
"description": "Specific unique column values to compute factors for (otherwise all unique values).",
"items": {
"type": "string"
"type": "string"
},
"minItems": 1,
"uniqueItems": True
Expand Down
12 changes: 9 additions & 3 deletions hed/tools/remodeling/operations/factor_hed_tags_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ class FactorHedTagsOp(BaseOp):
- **expand_context** (*bool*): If true, expand the context based on Onset, Offset, and Duration.
Notes:
- If query names are not provided, *query1*, *query2*, ... are used.
- If query names are not provided, *query1*, *query2*, ... are used.
- If query names are provided, the list must have same list as the number of queries.
- When the context is expanded, the effect of events for temporal extent is accounted for.
"""
Expand All @@ -35,6 +36,7 @@ class FactorHedTagsOp(BaseOp):
"properties": {
"queries": {
"type": "array",
"description": "List of HED tag queries to compute one-hot factors for.",
"items": {
"type": "string"
},
Expand All @@ -43,6 +45,7 @@ class FactorHedTagsOp(BaseOp):
},
"query_names": {
"type": "array",
"description": "Optional column names for the queries.",
"items": {
"type": "string"
},
Expand All @@ -51,17 +54,20 @@ class FactorHedTagsOp(BaseOp):
},
"remove_types": {
"type": "array",
"descriptions": "List of type tags to remove from before querying (e.g., Condition-variable, Task).",
"items": {
"type": "string"
},
"minItems": 1,
"uniqueItems": True
},
"expand_context": {
"type": "boolean"
"type": "boolean",
"description": "If true, the assembled HED tags include the effects of temporal extent (e.g., Onset)."
},
"replace_defs": {
"type": "boolean"
"type": "boolean",
"description": "If true, Def tags are replaced with definition contents."
}
},
"required": [
Expand Down
Loading

0 comments on commit 86172d9

Please sign in to comment.