hed-standard · VisLab · Mar 31, 2023 · Mar 31, 2023 · Mar 31, 2023 · Mar 31, 2023
diff --git a/hed/tools/remodeling/operations/summarize_definitions_op.py b/hed/tools/remodeling/operations/summarize_definitions_op.py
@@ -0,0 +1,116 @@
+""" Summarize the values in the columns of a tabular file. """
+
+from hed import DefinitionDict, TabularInput, Sidecar
+from hed.models.df_util import process_def_expands
+from hed.tools.analysis.analysis_util import assemble_hed
+from hed.tools.remodeling.operations.base_op import BaseOp
+from hed.tools.remodeling.operations.base_context import BaseContext
+
+
+class SummarizeDefinitionsOp(BaseOp):
+    """ Summarize the values in the columns of a tabular file.
+
+    Required remodeling parameters:
+        - **summary_name** (*str*): The name of the summary.   
+        - **summary_filename** (*str*): Base filename of the summary.   
+
+    The purpose is to produce a summary of the values in a tabular file.
+
+    """
+
+    PARAMS = {
+        "operation": "summarize_definitions",
+        "required_parameters": {
+            "summary_name": str,
+            "summary_filename": str
+        },
+        "optional_parameters": {
+        }
+    }
+
+    SUMMARY_TYPE = 'definitions'
+
+    def __init__(self, parameters):
+        """ Constructor for the summarize column values operation.
+
+        Parameters:
+            parameters (dict): Dictionary with the parameter values for required and optional parameters.
+
+        Raises:
+
+            KeyError   
+                - If a required parameter is missing.   
+                - If an unexpected parameter is provided.   
+
+            TypeError   
+                - If a parameter has the wrong type.    
+
+        """
+
+        super().__init__(self.PARAMS, parameters)
+        self.summary_name = parameters['summary_name']
+        self.summary_filename = parameters['summary_filename']
+
+    def do_op(self, dispatcher, df, name, sidecar=None):
+        """ Create factor columns corresponding to values in a specified column.
+
+        Parameters:
+            dispatcher (Dispatcher): Manages the operation I/O.
+            df (DataFrame): The DataFrame to be remodeled.
+            name (str):  Unique identifier for the dataframe -- often the original file path.
+            sidecar (Sidecar or file-like): Only needed for HED operations.
+
+        Returns:
+            DataFrame: A new DataFrame with the factor columns appended.
+
+        Side-effect:
+            Updates the context.
+
+        """
+
+        summary = dispatcher.context_dict.get(self.summary_name, None)
+        if not summary:
+            summary = DefinitionSummaryContext(self)
+            dispatcher.context_dict[self.summary_name] = summary
+        summary.update_context({'df': dispatcher.post_proc_data(df), 'name': name, 'sidecar': sidecar,
+                                'schema': dispatcher.hed_schema})
+        return df
+
+
+class DefinitionSummaryContext(BaseContext):
+
+    def __init__(self, sum_op):
+        super().__init__(sum_op.SUMMARY_TYPE, sum_op.summary_name, sum_op.summary_filename)
+        self.defs = DefinitionDict()
+        self.unresolved = {}
+        self.errors = {}
+
+    def update_context(self, new_context):
+        name = new_context['name']
+        data_input = TabularInput(new_context['df'], sidecar=new_context['sidecar'], name=new_context['name'])
+        sidecar = Sidecar(new_context['sidecar'])
+        df, _ = assemble_hed(data_input, sidecar, new_context['schema'],
+                             columns_included=None, expand_defs=True)
+        hed_strings = df['HED_assembled']
+        self.defs, self.unresolved, errors = process_def_expands(hed_strings, new_context['schema'],
+                                                                 known_defs=self.defs, ambiguous_defs=self.unresolved)
+        self.errors.update(errors)
+
+    def _get_summary_details(self, summary):
+        return None
+
+    def _merge_all(self):
+        return None
+
+    def _get_result_string(self, name, result, indent=BaseContext.DISPLAY_INDENT):
+        if name == "Dataset":
+            return self._get_dataset_string(result, indent=indent)
+        return self._get_individual_string(name, result, indent=indent)
+
+    @staticmethod
+    def _get_dataset_string(result, indent=BaseContext.DISPLAY_INDENT):
+        return ""
+
+    @staticmethod
+    def _get_individual_string(name, result, indent=BaseContext.DISPLAY_INDENT):
+        return ""
diff --git a/hed/tools/remodeling/operations/valid_operations.py b/hed/tools/remodeling/operations/valid_operations.py
@@ -15,6 +15,7 @@
 from hed.tools.remodeling.operations.split_rows_op import SplitRowsOp
 from hed.tools.remodeling.operations.summarize_column_names_op import SummarizeColumnNamesOp
 from hed.tools.remodeling.operations.summarize_column_values_op import SummarizeColumnValuesOp
+from hed.tools.remodeling.operations.summarize_definitions_op import SummarizeDefinitionsOp
 from hed.tools.remodeling.operations.summarize_sidecar_from_events_op import SummarizeSidecarFromEventsOp
 from hed.tools.remodeling.operations.summarize_hed_type_op import SummarizeHedTypeOp
 from hed.tools.remodeling.operations.summarize_hed_tags_op import SummarizeHedTagsOp
@@ -36,6 +37,7 @@
     'split_rows': SplitRowsOp,
     'summarize_column_names': SummarizeColumnNamesOp,
     'summarize_column_values': SummarizeColumnValuesOp,
+    'summarize_definitions': SummarizeDefinitionsOp,
     'summarize_sidecar_from_events': SummarizeSidecarFromEventsOp,
     'summarize_hed_type': SummarizeHedTypeOp,
     'summarize_hed_tags': SummarizeHedTagsOp,

diff --git a/tests/tools/bids/test_bids_tabular_dictionary.py b/tests/tools/bids/test_bids_tabular_dictionary.py
@@ -131,6 +131,24 @@ def test_report_diffs_diff_rows(self):
         self.assertTrue(output, "report_diffs has differences")
         self.assertTrue(logger.log, "report_diffs the logger is empty before report is called")
 
+    def test_with_tabular_summary(self):
+        from hed.tools.analysis.tabular_summary import TabularSummary
+        bids_root_path = os.path.realpath('../../data/bids_tests/eeg_ds003645s_hed')
+        name = 'eeg_ds003645s_hed'
+        exclude_dirs = ['stimuli']
+        entities = ('sub', 'run')
+        skip_columns = ["onset", "duration", "sample", "stim_file", "trial", "response_time"]
+
+        # Construct the file dictionary for the BIDS event files
+        event_files = get_file_list(bids_root_path, extensions=[".tsv"], name_suffix="_events",
+                                    exclude_dirs=exclude_dirs)
+        bids_tab = BidsTabularDictionary(name, event_files, entities=entities)
+
+        # Create a summary of the original BIDS events file content
+        bids_dicts_all, bids_dicts = TabularSummary.make_combined_dicts(bids_tab.file_dict, skip_cols=skip_columns)
+        self.assertIsInstance(bids_dicts, dict)
+        self.assertEqual(len(bids_dicts), len(event_files))
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/tests/tools/remodeling/operations/test_summarize_definitions_op.py b/tests/tools/remodeling/operations/test_summarize_definitions_op.py
@@ -0,0 +1,60 @@
+import json
+import os
+import unittest
+import pandas as pd
+from hed.models.df_util import get_assembled
+from hed.tools.remodeling.dispatcher import Dispatcher
+from hed.tools.remodeling.operations.summarize_definitions_op import SummarizeDefinitionsOp, DefinitionSummaryContext
+
+
+class Test(unittest.TestCase):
+
+    @classmethod
+    def setUpClass(cls):
+        path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)),
+                                             '../../../data/remodel_tests/'))
+        cls.data_path = os.path.realpath(os.path.join(path, 'sub-002_task-FacePerception_run-1_events.tsv'))
+        cls.json_path = os.path.realpath(os.path.join(path, 'task-FacePerception_events.json'))
+        base_parameters = {
+            "summary_name": 'get_definition_summary',
+            "summary_filename": 'summarize_definitions'
+        }
+        cls.json_parms = json.dumps(base_parameters)
+
+    @classmethod
+    def tearDownClass(cls):
+        pass
+
+    def test_constructor(self):
+        parms = json.loads(self.json_parms)
+        sum_op1 = SummarizeDefinitionsOp(parms)
+        self.assertIsInstance(sum_op1, SummarizeDefinitionsOp, "constructor creates an object of the correct type")
+        parms["expand_context"] = ""
+        with self.assertRaises(KeyError) as context:
+            SummarizeDefinitionsOp(parms)
+        self.assertEqual(context.exception.args[0], "BadParameter")
+        parms2 = json.loads(self.json_parms)
+        parms2["mystery"] = True
+        with self.assertRaises(KeyError) as context:
+            SummarizeDefinitionsOp(parms2)
+        self.assertEqual(context.exception.args[0], "BadParameter")
+
+    def test_do_op(self):
+        dispatch = Dispatcher([], data_root=None, backup_name=None, hed_versions=['8.1.0'])
+        parms = json.loads(self.json_parms)
+        sum_op = SummarizeDefinitionsOp(parms)
+        self.assertIsInstance(sum_op, SummarizeDefinitionsOp, "constructor creates an object of the correct type")
+        df = pd.read_csv(self.data_path, delimiter='\t', header=0, keep_default_na=False, na_values=",null")
+        df_new = sum_op.do_op(dispatch, dispatch.prep_data(df), 'subj2_run1', sidecar=self.json_path)
+        self.assertEqual(200, len(df_new), "summarize_hed_type_op dataframe length is correct")
+        self.assertEqual(10, len(df_new.columns), "summarize_hed_type_op has correct number of columns")
+        self.assertIn(sum_op.summary_name, dispatch.context_dict)
+        self.assertIsInstance(dispatch.context_dict[sum_op.summary_name], DefinitionSummaryContext)
+        # x = dispatch.context_dict[sum_op.summary_name].summary_dict['subj2_run1']
+        # self.assertEqual(len(dispatch.context_dict[sum_op.summary_name].summary_dict['subj2_run1'].tag_dict), 47)
+        # df_new = sum_op.do_op(dispatch, dispatch.prep_data(df), 'subj2_run2', sidecar=self.json_path)
+        # self.assertEqual(len(dispatch.context_dict[sum_op.summary_name].summary_dict['subj2_run2'].tag_dict), 47)
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tests/tools/remodeling/operations/test_summarize_hed_tags_op.py b/tests/tools/remodeling/operations/test_summarize_hed_tags_op.py
@@ -65,18 +65,6 @@ def test_do_op(self):
         df_new = sum_op.do_op(dispatch, dispatch.prep_data(df), 'subj2_run2', sidecar=self.json_path)
         self.assertEqual(len(dispatch.context_dict[sum_op.summary_name].summary_dict['subj2_run2'].tag_dict), 47)
 
-    def test_quick_test(self):
-        from hed.models.hed_tag import HedTag
-        from hed.schema import load_schema_version
-        my_tag = "Description/This is a test"
-        tag = HedTag(my_tag)
-        x = tag.tag_terms
-        # print(x)
-        my_schema = load_schema_version('8.1.0')
-        tag1 = HedTag(my_tag, hed_schema=my_schema)
-        x1 = tag1.tag_terms
-        # print(x1)
-
     def test_quick3(self):
         from hed.models import TabularInput, Sidecar
         from hed.schema import load_schema_version

diff --git a/tests/tools/remodeling/operations/test_summarize_hed_validation_op.py b/tests/tools/remodeling/operations/test_summarize_hed_validation_op.py
@@ -99,7 +99,6 @@ def test_get_summary_text_summary(self):
 
         sum_context1 = dispatch.context_dict[sum_op.summary_name]
         text_sum1 = sum_context1.get_text_summary(individual_summaries="separate")
-        # print(text_sum1)
         sum_op.do_op(dispatch, df, 'subj2_run2', sidecar=self.json_path)
         sum_op.do_op(dispatch, df, 'subj2_run3', sidecar=self.bad_json_path)
         text_sum2 = sum_context1.get_text_summary(individual_summaries="none")