Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added description fields to the remodeling JSON and removed get_assembled #889

Merged
merged 2 commits into from
Mar 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion hed/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,4 @@
from .sidecar import Sidecar
from .tabular_input import TabularInput
from .timeseries_input import TimeseriesInput
from .df_util import get_assembled, convert_to_form, shrink_defs, expand_defs, process_def_expands
from .df_util import convert_to_form, shrink_defs, expand_defs, process_def_expands
23 changes: 0 additions & 23 deletions hed/models/df_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,29 +6,6 @@
from hed.models.model_constants import DefTagNames


def get_assembled(tabular_file, hed_schema, extra_def_dicts=None, defs_expanded=True):
""" Create an array of assembled HedString objects (or list of these) of the same length as tabular file input.

Parameters:
tabular_file (TabularInput): Represents the tabular input file.
hed_schema (HedSchema): If str, will attempt to load as a version if it doesn't have a valid extension.
extra_def_dicts: list of DefinitionDict, optional
Any extra DefinitionDict objects to use when parsing the HED tags.
defs_expanded (bool): (Default True) Expands definitions if True, otherwise shrinks them.
Returns:
tuple:
hed_strings(list of HedStrings): A list of HedStrings
def_dict(DefinitionDict): The definitions from this Sidecar.
"""

def_dict = tabular_file.get_def_dict(hed_schema, extra_def_dicts=extra_def_dicts)
series_a = tabular_file.series_a
if defs_expanded:
return [HedString(x, hed_schema, def_dict).expand_defs() for x in series_a], def_dict
else:
return [HedString(x, hed_schema, def_dict).shrink_defs() for x in series_a], def_dict


def convert_to_form(df, hed_schema, tag_form, columns=None):
""" Convert all tags in underlying dataframe to the specified form (in place).

Expand Down
2 changes: 1 addition & 1 deletion hed/models/query_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def get_query_handlers(queries, query_names=None):
query_names = [f"query_{index}" for index in range(len(queries))]

if len(queries) != len(query_names):
issues.append(f"QueryNamesLengthBad: The query_names length {len(query_names)} must be empty or equal" +
issues.append(f"QueryNamesLengthBad: The query_names length {len(query_names)} must be empty or equal " +
f"to the queries length {len(queries)}.")
elif len(set(query_names)) != len(query_names):
issues.append(f"DuplicateQueryNames: The query names {str(query_names)} list has duplicates")
Expand Down
108 changes: 43 additions & 65 deletions hed/schema/schema_io/wiki2schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@


class SchemaLoaderWiki(SchemaLoader):
""" Loads MediaWiki schemas from filenames or strings.
""" Load MediaWiki schemas from filenames or strings.

Expected usage is SchemaLoaderWiki.load(filename)

Expand Down Expand Up @@ -104,12 +104,11 @@ def _parse_sections(self, wiki_lines_by_section, parse_order):
parse_func(lines_for_section)

def _read_header_section(self, lines):
"""Ensures the header has no content other than the initial line.
"""Ensure the header has no content other than the initial line.

Parameters:
lines (int, str): Lines for the header section.

Parameters
----------
lines: [(int, str)]
Lines for this section
"""
for line_number, line in lines:
if line.strip():
Expand All @@ -120,40 +119,34 @@ def _read_text_block(self, lines):
text = ""
for line_number, line in lines:
text += line
# We expect one blank line(plus the normal line break). Any more should be preserved
# We expect one blank line(plus the normal line break). Any additional lines should be preserved.
if text.endswith("\n\n"):
text = text[:-2]
elif text.endswith("\n"):
text = text[:-1]
return text

def _read_prologue(self, lines):
"""Adds the prologue
"""Add the prologue.

Parameters
----------
lines: [(int, str)]
Lines for this section
Parameters:
lines: (int, str): Lines for prologue section.
"""
self._schema.prologue = self._read_text_block(lines)

def _read_epilogue(self, lines):
"""Adds the epilogue
"""Adds the epilogue.

Parameters
----------
lines: [(int, str)]
Lines for this section
Parameters:
lines: (int, str): Lines for the epilogue section.
"""
self._schema.epilogue = self._read_text_block(lines)

def _read_schema(self, lines):
"""Adds the main schema section
"""Add the main schema section

Parameters
----------
lines: [(int, str)]
Lines for this section
Parameters:
lines (int, str): Lines for main schema section.
"""
self._schema._initialize_attributes(HedSectionKey.Tags)
parent_tags = []
Expand All @@ -168,8 +161,8 @@ def _read_schema(self, lines):
parent_tags = parent_tags[:level]
elif level > len(parent_tags):
self._add_fatal_error(line_number, line,
"Line has too many *'s at the front. You cannot skip a level."
, HedExceptions.WIKI_LINE_START_INVALID)
"Line has too many *'s at front. You cannot skip a level.",
HedExceptions.WIKI_LINE_START_INVALID)
continue
# Create the entry
tag_entry = self._add_tag_line(parent_tags, line_number, line)
Expand All @@ -194,12 +187,10 @@ def _read_schema(self, lines):
parent_tags.append(tag_entry.short_tag_name)

def _read_unit_classes(self, lines):
"""Adds the unit classes section
"""Add the unit classes section.

Parameters
----------
lines: [(int, str)]
Lines for this section
Parameters:
lines (int, str): Lines for the unit class section.
"""
self._schema._initialize_attributes(HedSectionKey.UnitClasses)
self._schema._initialize_attributes(HedSectionKey.Units)
Expand Down Expand Up @@ -227,22 +218,18 @@ def _read_section(self, lines, section_key):
self._add_to_dict(line_number, line, new_entry, section_key)

def _read_unit_modifiers(self, lines):
"""Adds the unit modifiers section
"""Add the unit modifiers section.

Parameters
----------
lines: [(int, str)]
Lines for this section
Parameters:
lines (int, str): Lines for the unit modifiers section.
"""
self._read_section(lines, HedSectionKey.UnitModifiers)

def _read_value_classes(self, lines):
"""Adds the unit modifiers section
"""Add the value classes section.

Parameters
----------
lines: [(int, str)]
Lines for this section
Parameters:
lines (int, str): Lines for the value class section.
"""
self._read_section(lines, HedSectionKey.ValueClasses)

Expand All @@ -255,14 +242,11 @@ def _read_attributes(self, lines):
def _get_header_attributes_internal(self, version_line):
"""Extracts all valid attributes like version from the HED line in .mediawiki format.

Parameters
----------
version_line: string
The line in the wiki file that contains the version or other attributes.
Parameters:
version_line (str): The line in the wiki file that contains the version or other attributes.

Returns
-------
{}: The key is the name of the attribute, value being the value. eg {'version':'v1.0.1'}
Returns:
dict: The key is the name of the attribute, value being the value. eg {'version':'v1.0.1'}
"""
if "=" not in version_line:
return self._get_header_attributes_internal_old(version_line)
Expand All @@ -285,7 +269,7 @@ def _parse_attributes_line(version_line):
for match in attr_re.finditer(version_line):
start, end = match.span()

# If there's unmatched content between the last match and the current one
# If there's unmatched content between the last match and the current one.
if start > last_end:
unmatched.append(version_line[last_end:start])

Expand All @@ -300,16 +284,13 @@ def _parse_attributes_line(version_line):
return matches, unmatched

def _get_header_attributes_internal_old(self, version_line):
""" Extracts all valid attributes like version from the HED line in .mediawiki format.
""" Extract all valid attributes like version from the HED line in .mediawiki format.

Parameters
----------
version_line: string
The line in the wiki file that contains the version or other attributes.
Parameters:
version_line (str): The line in the wiki file that contains the version or other attributes.

Returns
-------
{}: The key is the name of the attribute, value being the value. eg {'version':'v1.0.1'}
Returns:
dict: The key is the name of the attribute, value being the value. eg {'version':'v1.0.1'}.
"""
final_attributes = {}
attribute_pairs = version_line.split(',')
Expand Down Expand Up @@ -347,17 +328,14 @@ def _get_tag_level(tag_line):
return count

def _remove_nowiki_tag_from_line(self, line_number, tag_line):
"""Removes the nowiki tag from the line.
"""Remove the nowiki tag from the line.

Parameters
----------
line_number (int): The line number to report errors as
tag_line (string): A tag line.
Parameters:
line_number (int): The line number to report errors as
tag_line (str): A tag line.

Returns
-------
string
The line with the nowiki tag removed.
Returns:
str: The line with the nowiki tag removed.
"""
index1 = tag_line.find(no_wiki_start_tag)
index2 = tag_line.find(no_wiki_end_tag)
Expand Down Expand Up @@ -401,7 +379,7 @@ def _get_tag_attributes(self, line_number, tag_line, starting_index):
""" Get the tag attributes from a line.

Parameters:
line_number (int): The line number to report errors as
line_number (int): The line number to report errors as.
tag_line (str): A tag line.
starting_index (int): The first index we can check for the brackets.

Expand Down
11 changes: 8 additions & 3 deletions hed/tools/remodeling/operations/convert_columns_op.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
""" Convert the type of the specified columns of a tabular file. """
#TODO finish implementation
# TODO finish implementation

from hed.tools.remodeling.operations.base_op import BaseOp

Expand All @@ -12,7 +12,9 @@ class ConvertColumnsOp(BaseOp):
- **convert_to** (*str*): Name of type to convert to. (One of 'str', 'int', 'float', 'fixed'.)

Optional remodeling parameters:
- **decimal_places** (*int*): Number decimal places to keep (for fixed only).
- **decimal_places** (*int*): Number decimal places to keep (for fixed only).

Notes:

"""
NAME = "convert_columns"
Expand All @@ -22,6 +24,7 @@ class ConvertColumnsOp(BaseOp):
"properties": {
"column_names": {
"type": "array",
"description": "List of names of the columns whose types are to be converted to the specified type.",
"items": {
"type": "string"
},
Expand All @@ -30,10 +33,12 @@ class ConvertColumnsOp(BaseOp):
},
"convert_to": {
"type": "string",
"description": "Data type to convert the columns to.",
"enum": ['str', 'int', 'float', 'fixed'],
},
"decimal_places": {
"type": "integer"
"type": "integer",
"description": "The number of decimal points if converted to fixed."
}
},
"required": [
Expand Down
13 changes: 10 additions & 3 deletions hed/tools/remodeling/operations/factor_column_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,11 @@ class FactorColumnOp(BaseOp):

Optional remodeling parameters
- **factor_names** (*list*): Names to use as the factor columns.
- **factor_values** (*list*): Values in the column column_name to create factors for.
- **factor_values** (*list*): Values in the column column_name to create factors for.

Notes:
- If no factor_values are provided, factors are computed for each of the unique values in column_name column.
- If factor_names are provided, then factor_values must also be provided and the two lists be the same size.

"""
NAME = "factor_column"
Expand All @@ -20,10 +24,12 @@ class FactorColumnOp(BaseOp):
"type": "object",
"properties": {
"column_name": {
"type": "string"
"type": "string",
"description": "Name of the column for which to create one-hot factors for unique values."
},
"factor_names": {
"type": "array",
"description": "Names of the resulting factor columns. If given must be same length as factor_values",
"items": {
"type": "string"
},
Expand All @@ -32,8 +38,9 @@ class FactorColumnOp(BaseOp):
},
"factor_values": {
"type": "array",
"description": "Specific unique column values to compute factors for (otherwise all unique values).",
"items": {
"type": "string"
"type": "string"
},
"minItems": 1,
"uniqueItems": True
Expand Down
12 changes: 9 additions & 3 deletions hed/tools/remodeling/operations/factor_hed_tags_op.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ class FactorHedTagsOp(BaseOp):
- **expand_context** (*bool*): If true, expand the context based on Onset, Offset, and Duration.

Notes:
- If query names are not provided, *query1*, *query2*, ... are used.
- If query names are not provided, *query1*, *query2*, ... are used.
- If query names are provided, the list must have same list as the number of queries.
- When the context is expanded, the effect of events for temporal extent is accounted for.

"""
Expand All @@ -35,6 +36,7 @@ class FactorHedTagsOp(BaseOp):
"properties": {
"queries": {
"type": "array",
"description": "List of HED tag queries to compute one-hot factors for.",
"items": {
"type": "string"
},
Expand All @@ -43,6 +45,7 @@ class FactorHedTagsOp(BaseOp):
},
"query_names": {
"type": "array",
"description": "Optional column names for the queries.",
"items": {
"type": "string"
},
Expand All @@ -51,17 +54,20 @@ class FactorHedTagsOp(BaseOp):
},
"remove_types": {
"type": "array",
"descriptions": "List of type tags to remove from before querying (e.g., Condition-variable, Task).",
"items": {
"type": "string"
},
"minItems": 1,
"uniqueItems": True
},
"expand_context": {
"type": "boolean"
"type": "boolean",
"description": "If true, the assembled HED tags include the effects of temporal extent (e.g., Onset)."
},
"replace_defs": {
"type": "boolean"
"type": "boolean",
"description": "If true, Def tags are replaced with definition contents."
}
},
"required": [
Expand Down
Loading