Merge pull request #889 from VisLab/develop

Added description fields to the remodeling JSON and removed get_assembled
hed-standard · Mar 25, 2024 · 86172d9 · 86172d9
2 parents 71ba0cc + d74696b
commit 86172d9
Show file tree

Hide file tree

Showing 33 changed files with 398 additions and 355 deletions.
diff --git a/hed/models/__init__.py b/hed/models/__init__.py
@@ -13,4 +13,4 @@
 from .sidecar import Sidecar
 from .tabular_input import TabularInput
 from .timeseries_input import TimeseriesInput
-from .df_util import get_assembled, convert_to_form, shrink_defs, expand_defs, process_def_expands
+from .df_util import convert_to_form, shrink_defs, expand_defs, process_def_expands
diff --git a/hed/models/df_util.py b/hed/models/df_util.py
@@ -6,29 +6,6 @@
 from hed.models.model_constants import DefTagNames
 
 
-def get_assembled(tabular_file, hed_schema, extra_def_dicts=None, defs_expanded=True):
-    """ Create an array of assembled HedString objects (or list of these) of the same length as tabular file input.
-
-    Parameters:
-        tabular_file (TabularInput): Represents the tabular input file.
-        hed_schema (HedSchema): If str, will attempt to load as a version if it doesn't have a valid extension.
-        extra_def_dicts: list of DefinitionDict, optional
-            Any extra DefinitionDict objects to use when parsing the HED tags.
-        defs_expanded (bool): (Default True) Expands definitions if True, otherwise shrinks them.
-    Returns:
-        tuple:
-            hed_strings(list of HedStrings): A list of HedStrings
-            def_dict(DefinitionDict): The definitions from this Sidecar.
-    """
-
-    def_dict = tabular_file.get_def_dict(hed_schema, extra_def_dicts=extra_def_dicts)
-    series_a = tabular_file.series_a
-    if defs_expanded:
-        return [HedString(x, hed_schema, def_dict).expand_defs() for x in series_a], def_dict
-    else:
-        return [HedString(x, hed_schema, def_dict).shrink_defs() for x in series_a], def_dict
-
-
 def convert_to_form(df, hed_schema, tag_form, columns=None):
     """ Convert all tags in underlying dataframe to the specified form (in place).
 

diff --git a/hed/models/query_service.py b/hed/models/query_service.py
@@ -27,7 +27,7 @@ def get_query_handlers(queries, query_names=None):
         query_names = [f"query_{index}" for index in range(len(queries))]
 
     if len(queries) != len(query_names):
-        issues.append(f"QueryNamesLengthBad: The query_names length {len(query_names)} must be empty or equal" +
+        issues.append(f"QueryNamesLengthBad: The query_names length {len(query_names)} must be empty or equal " +
                       f"to the queries length {len(queries)}.")
     elif len(set(query_names)) != len(query_names):
         issues.append(f"DuplicateQueryNames: The query names {str(query_names)} list has duplicates")

diff --git a/hed/schema/schema_io/wiki2schema.py b/hed/schema/schema_io/wiki2schema.py
@@ -36,7 +36,7 @@
 
 
 class SchemaLoaderWiki(SchemaLoader):
-    """ Loads MediaWiki schemas from filenames or strings.
+    """ Load MediaWiki schemas from filenames or strings.
 
         Expected usage is SchemaLoaderWiki.load(filename)
 
@@ -104,12 +104,11 @@ def _parse_sections(self, wiki_lines_by_section, parse_order):
             parse_func(lines_for_section)
 
     def _read_header_section(self, lines):
-        """Ensures the header has no content other than the initial line.
+        """Ensure the header has no content other than the initial line.
+
+        Parameters:
+            lines (int, str): Lines for the header section.
 
-        Parameters
-        ----------
-        lines: [(int, str)]
-            Lines for this section
         """
         for line_number, line in lines:
             if line.strip():
@@ -120,40 +119,34 @@ def _read_text_block(self, lines):
         text = ""
         for line_number, line in lines:
             text += line
-        # We expect one blank line(plus the normal line break).  Any more should be preserved
+        # We expect one blank line(plus the normal line break).  Any additional lines should be preserved.
         if text.endswith("\n\n"):
             text = text[:-2]
         elif text.endswith("\n"):
             text = text[:-1]
         return text
 
     def _read_prologue(self, lines):
-        """Adds the prologue
+        """Add the prologue.
 
-        Parameters
-        ----------
-        lines: [(int, str)]
-            Lines for this section
+        Parameters:
+            lines: (int, str): Lines for prologue section.
         """
         self._schema.prologue = self._read_text_block(lines)
 
     def _read_epilogue(self, lines):
-        """Adds the epilogue
+        """Adds the epilogue.
 
-        Parameters
-        ----------
-        lines: [(int, str)]
-            Lines for this section
+        Parameters:
+            lines: (int, str): Lines for the epilogue section.
         """
         self._schema.epilogue = self._read_text_block(lines)
 
     def _read_schema(self, lines):
-        """Adds the main schema section
+        """Add the main schema section
 
-        Parameters
-        ----------
-        lines: [(int, str)]
-            Lines for this section
+        Parameters:
+            lines (int, str): Lines for main schema section.
         """
         self._schema._initialize_attributes(HedSectionKey.Tags)
         parent_tags = []
@@ -168,8 +161,8 @@ def _read_schema(self, lines):
                     parent_tags = parent_tags[:level]
                 elif level > len(parent_tags):
                     self._add_fatal_error(line_number, line,
-                                          "Line has too many *'s at the front.  You cannot skip a level."
-                                          , HedExceptions.WIKI_LINE_START_INVALID)
+                                          "Line has too many *'s at front.  You cannot skip a level.",
+                                          HedExceptions.WIKI_LINE_START_INVALID)
                     continue
             # Create the entry
             tag_entry = self._add_tag_line(parent_tags, line_number, line)
@@ -194,12 +187,10 @@ def _read_schema(self, lines):
             parent_tags.append(tag_entry.short_tag_name)
 
     def _read_unit_classes(self, lines):
-        """Adds the unit classes section
+        """Add the unit classes section.
 
-        Parameters
-        ----------
-        lines: [(int, str)]
-            Lines for this section
+        Parameters:
+            lines (int, str): Lines for the unit class section.
         """
         self._schema._initialize_attributes(HedSectionKey.UnitClasses)
         self._schema._initialize_attributes(HedSectionKey.Units)
@@ -227,22 +218,18 @@ def _read_section(self, lines, section_key):
             self._add_to_dict(line_number, line, new_entry, section_key)
 
     def _read_unit_modifiers(self, lines):
-        """Adds the unit modifiers section
+        """Add the unit modifiers section.
 
-        Parameters
-        ----------
-        lines: [(int, str)]
-            Lines for this section
+        Parameters:
+            lines (int, str): Lines for the unit modifiers section.
         """
         self._read_section(lines, HedSectionKey.UnitModifiers)
 
     def _read_value_classes(self, lines):
-        """Adds the unit modifiers section
+        """Add the value classes section.
 
-        Parameters
-        ----------
-        lines: [(int, str)]
-            Lines for this section
+        Parameters:
+            lines (int, str): Lines for the value class section.
         """
         self._read_section(lines, HedSectionKey.ValueClasses)
 
@@ -255,14 +242,11 @@ def _read_attributes(self, lines):
     def _get_header_attributes_internal(self, version_line):
         """Extracts all valid attributes like version from the HED line in .mediawiki format.
 
-        Parameters
-        ----------
-        version_line: string
-            The line in the wiki file that contains the version or other attributes.
+        Parameters:
+            version_line (str): The line in the wiki file that contains the version or other attributes.
 
-        Returns
-        -------
-        {}: The key is the name of the attribute, value being the value.  eg {'version':'v1.0.1'}
+        Returns:
+            dict: The key is the name of the attribute, value being the value.  eg {'version':'v1.0.1'}
         """
         if "=" not in version_line:
             return self._get_header_attributes_internal_old(version_line)
@@ -285,7 +269,7 @@ def _parse_attributes_line(version_line):
         for match in attr_re.finditer(version_line):
             start, end = match.span()
 
-            # If there's unmatched content between the last match and the current one
+            # If there's unmatched content between the last match and the current one.
             if start > last_end:
                 unmatched.append(version_line[last_end:start])
 
@@ -300,16 +284,13 @@ def _parse_attributes_line(version_line):
         return matches, unmatched
 
     def _get_header_attributes_internal_old(self, version_line):
-        """ Extracts all valid attributes like version from the HED line in .mediawiki format.
+        """ Extract all valid attributes like version from the HED line in .mediawiki format.
 
-        Parameters
-        ----------
-        version_line: string
-            The line in the wiki file that contains the version or other attributes.
+        Parameters:
+            version_line (str): The line in the wiki file that contains the version or other attributes.
 
-        Returns
-        -------
-        {}: The key is the name of the attribute, value being the value.  eg {'version':'v1.0.1'}
+        Returns:
+            dict: The key is the name of the attribute, value being the value.  eg {'version':'v1.0.1'}.
         """
         final_attributes = {}
         attribute_pairs = version_line.split(',')
@@ -347,17 +328,14 @@ def _get_tag_level(tag_line):
         return count
 
     def _remove_nowiki_tag_from_line(self, line_number, tag_line):
-        """Removes the nowiki tag from the  line.
+        """Remove the nowiki tag from the  line.
 
-        Parameters
-        ----------
-        line_number (int): The line number to report errors as
-        tag_line (string): A tag line.
+        Parameters:
+            line_number (int): The line number to report errors as
+            tag_line (str): A tag line.
 
-        Returns
-        -------
-        string
-            The line with the nowiki tag removed.
+        Returns:
+            str: The line with the nowiki tag removed.
         """
         index1 = tag_line.find(no_wiki_start_tag)
         index2 = tag_line.find(no_wiki_end_tag)
@@ -401,7 +379,7 @@ def _get_tag_attributes(self, line_number, tag_line, starting_index):
         """ Get the tag attributes from a line.
 
         Parameters:
-            line_number (int): The line number to report errors as
+            line_number (int): The line number to report errors as.
             tag_line (str): A tag line.
             starting_index (int): The first index we can check for the brackets.
 

diff --git a/hed/tools/remodeling/operations/convert_columns_op.py b/hed/tools/remodeling/operations/convert_columns_op.py
@@ -1,5 +1,5 @@
 """ Convert the type of the specified columns of a tabular file. """
-#TODO finish implementation
+# TODO finish implementation
 
 from hed.tools.remodeling.operations.base_op import BaseOp
 
@@ -12,7 +12,9 @@ class ConvertColumnsOp(BaseOp):
         - **convert_to** (*str*):  Name of type to convert to. (One of 'str', 'int', 'float', 'fixed'.)   
     
     Optional remodeling parameters:
-        - **decimal_places** (*int*):   Number decimal places to keep (for fixed only).   
+        - **decimal_places** (*int*):   Number decimal places to keep (for fixed only).
+
+    Notes:
  
     """
     NAME = "convert_columns"
@@ -22,6 +24,7 @@ class ConvertColumnsOp(BaseOp):
         "properties": {
             "column_names": {
                 "type": "array",
+                "description": "List of names of the columns whose types are to be converted to the specified type.",
                 "items": {
                     "type": "string"
                 },
@@ -30,10 +33,12 @@ class ConvertColumnsOp(BaseOp):
             },
             "convert_to": {
                 "type": "string",
+                "description": "Data type to convert the columns to.",
                 "enum": ['str', 'int', 'float', 'fixed'],
             },
             "decimal_places": {
-                "type": "integer"
+                "type": "integer",
+                "description": "The number of decimal points if converted to fixed."
             }
         },
         "required": [

diff --git a/hed/tools/remodeling/operations/factor_column_op.py b/hed/tools/remodeling/operations/factor_column_op.py
@@ -11,7 +11,11 @@ class FactorColumnOp(BaseOp):
 
     Optional remodeling parameters
         - **factor_names** (*list*):   Names to use as the factor columns.  
-        - **factor_values** (*list*):  Values in the column column_name to create factors for.    
+        - **factor_values** (*list*):  Values in the column column_name to create factors for.
+
+    Notes:
+        - If no factor_values are provided, factors are computed for each of the unique values in column_name column.
+        - If factor_names are provided, then factor_values must also be provided and the two lists be the same size.
 
     """
     NAME = "factor_column"
@@ -20,10 +24,12 @@ class FactorColumnOp(BaseOp):
         "type": "object",
         "properties": {
             "column_name": {
-                "type": "string"
+                "type": "string",
+                "description": "Name of the column for which to create one-hot factors for unique values."
             },
             "factor_names": {
                 "type": "array",
+                "description": "Names of the resulting factor columns. If given must be same length as factor_values",
                 "items": {
                     "type": "string"
                 },
@@ -32,8 +38,9 @@ class FactorColumnOp(BaseOp):
             },
             "factor_values": {
                 "type": "array",
+                "description": "Specific unique column values to compute factors for (otherwise all unique values).",
                 "items": {
-                        "type": "string"
+                    "type": "string"
                 },
                 "minItems": 1,
                 "uniqueItems": True

diff --git a/hed/tools/remodeling/operations/factor_hed_tags_op.py b/hed/tools/remodeling/operations/factor_hed_tags_op.py
@@ -24,7 +24,8 @@ class FactorHedTagsOp(BaseOp):
         - **expand_context** (*bool*): If true, expand the context based on Onset, Offset, and Duration.
 
     Notes:  
-        - If query names are not provided, *query1*, *query2*, ... are used.   
+        - If query names are not provided, *query1*, *query2*, ... are used.
+        - If query names are provided, the list must have same list as the number of queries.
         - When the context is expanded, the effect of events for temporal extent is accounted for.
 
     """
@@ -35,6 +36,7 @@ class FactorHedTagsOp(BaseOp):
         "properties": {
             "queries": {
                 "type": "array",
+                "description": "List of HED tag queries to compute one-hot factors for.",
                 "items": {
                     "type": "string"
                 },
@@ -43,6 +45,7 @@ class FactorHedTagsOp(BaseOp):
             },
             "query_names": {
                 "type": "array",
+                "description": "Optional column names for the queries.",
                 "items": {
                     "type": "string"
                 },
@@ -51,17 +54,20 @@ class FactorHedTagsOp(BaseOp):
             },
             "remove_types": {
                 "type": "array",
+                "descriptions": "List of type tags to remove from before querying (e.g., Condition-variable, Task).",
                 "items": {
                     "type": "string"
                 },
                 "minItems": 1,
                 "uniqueItems": True
             },
             "expand_context": {
-                "type": "boolean"
+                "type": "boolean",
+                "description": "If true, the assembled HED tags include the effects of temporal extent (e.g., Onset)."
             },
             "replace_defs": {
-                "type": "boolean"
+                "type": "boolean",
+                "description": "If true, Def tags are replaced with definition contents."
             }
         },
         "required": [