From 6bbf2d9cb56b007766e6d30db533691821e11c74 Mon Sep 17 00:00:00 2001 From: Kay Robbins <1189050+VisLab@users.noreply.github.com> Date: Fri, 31 Mar 2023 07:06:43 -0500 Subject: [PATCH 1/2] Updated the tests --- .../tools/bids/test_bids_tabular_dictionary.py | 18 ++++++++++++++++++ .../operations/test_summarize_hed_tags_op.py | 12 ------------ .../test_summarize_hed_validation_op.py | 1 - 3 files changed, 18 insertions(+), 13 deletions(-) diff --git a/tests/tools/bids/test_bids_tabular_dictionary.py b/tests/tools/bids/test_bids_tabular_dictionary.py index 71ef3cf7f..088fcf6ac 100644 --- a/tests/tools/bids/test_bids_tabular_dictionary.py +++ b/tests/tools/bids/test_bids_tabular_dictionary.py @@ -131,6 +131,24 @@ def test_report_diffs_diff_rows(self): self.assertTrue(output, "report_diffs has differences") self.assertTrue(logger.log, "report_diffs the logger is empty before report is called") + def test_with_tabular_summary(self): + from hed.tools.analysis.tabular_summary import TabularSummary + bids_root_path = os.path.realpath('../../data/bids_tests/eeg_ds003645s_hed') + name = 'eeg_ds003645s_hed' + exclude_dirs = ['stimuli'] + entities = ('sub', 'run') + skip_columns = ["onset", "duration", "sample", "stim_file", "trial", "response_time"] + + # Construct the file dictionary for the BIDS event files + event_files = get_file_list(bids_root_path, extensions=[".tsv"], name_suffix="_events", + exclude_dirs=exclude_dirs) + bids_tab = BidsTabularDictionary(name, event_files, entities=entities) + + # Create a summary of the original BIDS events file content + bids_dicts_all, bids_dicts = TabularSummary.make_combined_dicts(bids_tab.file_dict, skip_cols=skip_columns) + self.assertIsInstance(bids_dicts, dict) + self.assertEqual(len(bids_dicts), len(event_files)) + if __name__ == '__main__': unittest.main() diff --git a/tests/tools/remodeling/operations/test_summarize_hed_tags_op.py b/tests/tools/remodeling/operations/test_summarize_hed_tags_op.py index aa3bd4b9c..d5a298202 100644 --- a/tests/tools/remodeling/operations/test_summarize_hed_tags_op.py +++ b/tests/tools/remodeling/operations/test_summarize_hed_tags_op.py @@ -65,18 +65,6 @@ def test_do_op(self): df_new = sum_op.do_op(dispatch, dispatch.prep_data(df), 'subj2_run2', sidecar=self.json_path) self.assertEqual(len(dispatch.context_dict[sum_op.summary_name].summary_dict['subj2_run2'].tag_dict), 47) - def test_quick_test(self): - from hed.models.hed_tag import HedTag - from hed.schema import load_schema_version - my_tag = "Description/This is a test" - tag = HedTag(my_tag) - x = tag.tag_terms - # print(x) - my_schema = load_schema_version('8.1.0') - tag1 = HedTag(my_tag, hed_schema=my_schema) - x1 = tag1.tag_terms - # print(x1) - def test_quick3(self): from hed.models import TabularInput, Sidecar from hed.schema import load_schema_version diff --git a/tests/tools/remodeling/operations/test_summarize_hed_validation_op.py b/tests/tools/remodeling/operations/test_summarize_hed_validation_op.py index e6ae19944..0136c205e 100644 --- a/tests/tools/remodeling/operations/test_summarize_hed_validation_op.py +++ b/tests/tools/remodeling/operations/test_summarize_hed_validation_op.py @@ -99,7 +99,6 @@ def test_get_summary_text_summary(self): sum_context1 = dispatch.context_dict[sum_op.summary_name] text_sum1 = sum_context1.get_text_summary(individual_summaries="separate") - # print(text_sum1) sum_op.do_op(dispatch, df, 'subj2_run2', sidecar=self.json_path) sum_op.do_op(dispatch, df, 'subj2_run3', sidecar=self.bad_json_path) text_sum2 = sum_context1.get_text_summary(individual_summaries="none") From 4d6d35e31c5529630e07b11a129d03ab722ae57d Mon Sep 17 00:00:00 2001 From: Kay Robbins <1189050+VisLab@users.noreply.github.com> Date: Fri, 31 Mar 2023 17:21:59 -0500 Subject: [PATCH 2/2] First stab at Definitions summary --- .../operations/summarize_definitions_op.py | 116 ++++++++++++++++++ .../remodeling/operations/valid_operations.py | 2 + .../test_summarize_definitions_op.py | 60 +++++++++ 3 files changed, 178 insertions(+) create mode 100644 hed/tools/remodeling/operations/summarize_definitions_op.py create mode 100644 tests/tools/remodeling/operations/test_summarize_definitions_op.py diff --git a/hed/tools/remodeling/operations/summarize_definitions_op.py b/hed/tools/remodeling/operations/summarize_definitions_op.py new file mode 100644 index 000000000..26f9b7ab6 --- /dev/null +++ b/hed/tools/remodeling/operations/summarize_definitions_op.py @@ -0,0 +1,116 @@ +""" Summarize the values in the columns of a tabular file. """ + +from hed import DefinitionDict, TabularInput, Sidecar +from hed.models.df_util import process_def_expands +from hed.tools.analysis.analysis_util import assemble_hed +from hed.tools.remodeling.operations.base_op import BaseOp +from hed.tools.remodeling.operations.base_context import BaseContext + + +class SummarizeDefinitionsOp(BaseOp): + """ Summarize the values in the columns of a tabular file. + + Required remodeling parameters: + - **summary_name** (*str*): The name of the summary. + - **summary_filename** (*str*): Base filename of the summary. + + The purpose is to produce a summary of the values in a tabular file. + + """ + + PARAMS = { + "operation": "summarize_definitions", + "required_parameters": { + "summary_name": str, + "summary_filename": str + }, + "optional_parameters": { + } + } + + SUMMARY_TYPE = 'definitions' + + def __init__(self, parameters): + """ Constructor for the summarize column values operation. + + Parameters: + parameters (dict): Dictionary with the parameter values for required and optional parameters. + + Raises: + + KeyError + - If a required parameter is missing. + - If an unexpected parameter is provided. + + TypeError + - If a parameter has the wrong type. + + """ + + super().__init__(self.PARAMS, parameters) + self.summary_name = parameters['summary_name'] + self.summary_filename = parameters['summary_filename'] + + def do_op(self, dispatcher, df, name, sidecar=None): + """ Create factor columns corresponding to values in a specified column. + + Parameters: + dispatcher (Dispatcher): Manages the operation I/O. + df (DataFrame): The DataFrame to be remodeled. + name (str): Unique identifier for the dataframe -- often the original file path. + sidecar (Sidecar or file-like): Only needed for HED operations. + + Returns: + DataFrame: A new DataFrame with the factor columns appended. + + Side-effect: + Updates the context. + + """ + + summary = dispatcher.context_dict.get(self.summary_name, None) + if not summary: + summary = DefinitionSummaryContext(self) + dispatcher.context_dict[self.summary_name] = summary + summary.update_context({'df': dispatcher.post_proc_data(df), 'name': name, 'sidecar': sidecar, + 'schema': dispatcher.hed_schema}) + return df + + +class DefinitionSummaryContext(BaseContext): + + def __init__(self, sum_op): + super().__init__(sum_op.SUMMARY_TYPE, sum_op.summary_name, sum_op.summary_filename) + self.defs = DefinitionDict() + self.unresolved = {} + self.errors = {} + + def update_context(self, new_context): + name = new_context['name'] + data_input = TabularInput(new_context['df'], sidecar=new_context['sidecar'], name=new_context['name']) + sidecar = Sidecar(new_context['sidecar']) + df, _ = assemble_hed(data_input, sidecar, new_context['schema'], + columns_included=None, expand_defs=True) + hed_strings = df['HED_assembled'] + self.defs, self.unresolved, errors = process_def_expands(hed_strings, new_context['schema'], + known_defs=self.defs, ambiguous_defs=self.unresolved) + self.errors.update(errors) + + def _get_summary_details(self, summary): + return None + + def _merge_all(self): + return None + + def _get_result_string(self, name, result, indent=BaseContext.DISPLAY_INDENT): + if name == "Dataset": + return self._get_dataset_string(result, indent=indent) + return self._get_individual_string(name, result, indent=indent) + + @staticmethod + def _get_dataset_string(result, indent=BaseContext.DISPLAY_INDENT): + return "" + + @staticmethod + def _get_individual_string(name, result, indent=BaseContext.DISPLAY_INDENT): + return "" diff --git a/hed/tools/remodeling/operations/valid_operations.py b/hed/tools/remodeling/operations/valid_operations.py index d00391270..8753ed1d6 100644 --- a/hed/tools/remodeling/operations/valid_operations.py +++ b/hed/tools/remodeling/operations/valid_operations.py @@ -15,6 +15,7 @@ from hed.tools.remodeling.operations.split_rows_op import SplitRowsOp from hed.tools.remodeling.operations.summarize_column_names_op import SummarizeColumnNamesOp from hed.tools.remodeling.operations.summarize_column_values_op import SummarizeColumnValuesOp +from hed.tools.remodeling.operations.summarize_definitions_op import SummarizeDefinitionsOp from hed.tools.remodeling.operations.summarize_sidecar_from_events_op import SummarizeSidecarFromEventsOp from hed.tools.remodeling.operations.summarize_hed_type_op import SummarizeHedTypeOp from hed.tools.remodeling.operations.summarize_hed_tags_op import SummarizeHedTagsOp @@ -36,6 +37,7 @@ 'split_rows': SplitRowsOp, 'summarize_column_names': SummarizeColumnNamesOp, 'summarize_column_values': SummarizeColumnValuesOp, + 'summarize_definitions': SummarizeDefinitionsOp, 'summarize_sidecar_from_events': SummarizeSidecarFromEventsOp, 'summarize_hed_type': SummarizeHedTypeOp, 'summarize_hed_tags': SummarizeHedTagsOp, diff --git a/tests/tools/remodeling/operations/test_summarize_definitions_op.py b/tests/tools/remodeling/operations/test_summarize_definitions_op.py new file mode 100644 index 000000000..b5100a652 --- /dev/null +++ b/tests/tools/remodeling/operations/test_summarize_definitions_op.py @@ -0,0 +1,60 @@ +import json +import os +import unittest +import pandas as pd +from hed.models.df_util import get_assembled +from hed.tools.remodeling.dispatcher import Dispatcher +from hed.tools.remodeling.operations.summarize_definitions_op import SummarizeDefinitionsOp, DefinitionSummaryContext + + +class Test(unittest.TestCase): + + @classmethod + def setUpClass(cls): + path = os.path.realpath(os.path.join(os.path.dirname(os.path.realpath(__file__)), + '../../../data/remodel_tests/')) + cls.data_path = os.path.realpath(os.path.join(path, 'sub-002_task-FacePerception_run-1_events.tsv')) + cls.json_path = os.path.realpath(os.path.join(path, 'task-FacePerception_events.json')) + base_parameters = { + "summary_name": 'get_definition_summary', + "summary_filename": 'summarize_definitions' + } + cls.json_parms = json.dumps(base_parameters) + + @classmethod + def tearDownClass(cls): + pass + + def test_constructor(self): + parms = json.loads(self.json_parms) + sum_op1 = SummarizeDefinitionsOp(parms) + self.assertIsInstance(sum_op1, SummarizeDefinitionsOp, "constructor creates an object of the correct type") + parms["expand_context"] = "" + with self.assertRaises(KeyError) as context: + SummarizeDefinitionsOp(parms) + self.assertEqual(context.exception.args[0], "BadParameter") + parms2 = json.loads(self.json_parms) + parms2["mystery"] = True + with self.assertRaises(KeyError) as context: + SummarizeDefinitionsOp(parms2) + self.assertEqual(context.exception.args[0], "BadParameter") + + def test_do_op(self): + dispatch = Dispatcher([], data_root=None, backup_name=None, hed_versions=['8.1.0']) + parms = json.loads(self.json_parms) + sum_op = SummarizeDefinitionsOp(parms) + self.assertIsInstance(sum_op, SummarizeDefinitionsOp, "constructor creates an object of the correct type") + df = pd.read_csv(self.data_path, delimiter='\t', header=0, keep_default_na=False, na_values=",null") + df_new = sum_op.do_op(dispatch, dispatch.prep_data(df), 'subj2_run1', sidecar=self.json_path) + self.assertEqual(200, len(df_new), "summarize_hed_type_op dataframe length is correct") + self.assertEqual(10, len(df_new.columns), "summarize_hed_type_op has correct number of columns") + self.assertIn(sum_op.summary_name, dispatch.context_dict) + self.assertIsInstance(dispatch.context_dict[sum_op.summary_name], DefinitionSummaryContext) + # x = dispatch.context_dict[sum_op.summary_name].summary_dict['subj2_run1'] + # self.assertEqual(len(dispatch.context_dict[sum_op.summary_name].summary_dict['subj2_run1'].tag_dict), 47) + # df_new = sum_op.do_op(dispatch, dispatch.prep_data(df), 'subj2_run2', sidecar=self.json_path) + # self.assertEqual(len(dispatch.context_dict[sum_op.summary_name].summary_dict['subj2_run2'].tag_dict), 47) + + +if __name__ == '__main__': + unittest.main()