Update set file path override and chunks logic (#1050)

* Update set file path override and chunks logic * flake8
aces · Mar 15, 2024 · 8b20077 · 8b20077
1 parent c4e8e22
commit 8b20077
Show file tree

Hide file tree

Showing 4 changed files with 64 additions and 50 deletions.
diff --git a/python/extract_eeg_bids_archive.py b/python/extract_eeg_bids_archive.py
@@ -11,6 +11,7 @@
 from lib.database_lib.config import Config
 from lib.exitcode import SUCCESS, BAD_CONFIG_SETTING
 from lib.log import Log
+import lib.utilities as utilities
 
 __license__ = "GPLv3"
 
@@ -191,8 +192,25 @@ def main():
         if not error:
             for modality in modalities:
                 tmp_eeg_modality_path = os.path.join(tmp_eeg_session_path, modality)
-                s3_data_dir = config_db_obj.get_config("EEGS3DataPath")
 
+                # if the EEG file was a set file, then update the filename for the .set
+                # and .fdt files in the .set file so it can find the proper file for
+                # visualization and analyses
+                set_files = [
+                    os.path.join(tmp_eeg_modality_path, file)
+                    for file in os.listdir(tmp_eeg_modality_path)
+                    if os.path.splitext(file)[1] == '.set'
+                ]
+                for set_full_path in set_files:
+                    width_fdt_file = os.path.isfile(set_full_path.replace(".set", ".fdt"))
+
+                    file_paths_updated = utilities.update_set_file_path_info(set_full_path, width_fdt_file)
+                    if not file_paths_updated:
+                        message = "WARNING: cannot update the set file " \
+                                  + os.path.basename(set_full_path) + " path info"
+                        print(message)
+
+                s3_data_dir = config_db_obj.get_config("EEGS3DataPath")
                 if s3_obj and s3_data_dir and s3_data_dir.startswith('s3://'):
                     s3_data_eeg_modality_path = os.path.join(s3_data_dir, eeg_session_rel_path, modality)
 

diff --git a/python/lib/eeg.py b/python/lib/eeg.py
@@ -513,26 +513,27 @@ def fetch_and_insert_eeg_files(self, derivatives=False, detect=True):
                     eeg_file_info, eeg_file_data
                 )
 
-            # if the EEG file was a set file, then update the filename for the .set
-            # and .fdt files in the .set file so it can find the proper file for
-            # visualization and analyses
-            file_paths_updated = file_type != 'set'
-            if not file_paths_updated:
-                set_full_path = os.path.join(self.data_dir, eeg_path)
-                fdt_full_path = eeg_file_data['fdt_file'] if 'fdt_file' in eeg_file_data.keys() else None
-
-                if fdt_full_path:
-                    fdt_full_path = os.path.join(self.data_dir, eeg_file_data['fdt_file'])
-                file_paths_updated = utilities.update_set_file_path_info(set_full_path, fdt_full_path)
-
-            if file_paths_updated:
-                inserted_eegs.append({
-                    'file_id': physio_file_id,
-                    'file_path': eeg_path,
-                    'eegjson_file_path': eegjson_file_path,
-                    'fdt_file_path': fdt_file_path,
-                    'original_file_data': eeg_file,
-                })
+            if self.loris_bids_root_dir:
+                # If we copy the file in assembly_bids and
+                # if the EEG file was a set file, then update the filename for the .set
+                # and .fdt files in the .set file so it can find the proper file for
+                # visualization and analyses
+                if file_type == 'set':
+                    set_full_path = os.path.join(self.data_dir, eeg_path)
+                    width_fdt_file = True if 'fdt_file' in eeg_file_data.keys() else False
+
+                    file_paths_updated = utilities.update_set_file_path_info(set_full_path, width_fdt_file)
+                    if not file_paths_updated:
+                        message = "WARNING: cannot update the set file " + eeg_path + " path info"
+                        print(message)
+
+            inserted_eegs.append({
+                'file_id': physio_file_id,
+                'file_path': eeg_path,
+                'eegjson_file_path': eegjson_file_path,
+                'fdt_file_path': fdt_file_path,
+                'original_file_data': eeg_file,
+            })
 
         return inserted_eegs
 

diff --git a/python/lib/utilities.py b/python/lib/utilities.py
@@ -159,16 +159,16 @@ def create_archive(files_to_archive, archive_path):
         tar.close()
 
 
-def update_set_file_path_info(set_file, fdt_file):
+def update_set_file_path_info(set_file, with_fdt_file):
     """
     Updates the path info of the set file with the correct filenames for .set and
     .fdt files (for cases that had to be relabelled to include a Visit Label at
     the time of import.
 
     :param set_file: complete path of the .set file
      :type set_file: str
-    :param fdt_file: complete path of the .fdt file
-     :type fdt_file: str
+    :param with_fdt_file: Confirm presence of a matching .fdt file
+     :type with_fdt_file: bool
     """
 
     # grep the basename without the extension of set_file
@@ -187,7 +187,7 @@ def update_set_file_path_info(set_file, fdt_file):
             dataset['setname'] = numpy.array(basename)
         if 'EEG' in dataset.keys():
             dataset['EEG'][0][0][1] = set_file_name
-        if fdt_file and 'EEG' in dataset.keys():
+        if with_fdt_file and 'EEG' in dataset.keys():
             dataset['EEG'][0][0][15] = fdt_file_name
             dataset['EEG'][0][0][40] = fdt_file_name
 
@@ -203,7 +203,7 @@ def update_set_file_path_info(set_file, fdt_file):
                   .format(set_file_name))
             return False
 
-        if fdt_file:
+        if with_fdt_file:
             if 'datfile' not in dataset.keys() or \
                     dataset['datfile'] != fdt_file_name:
                 print('Expected `datfile` field: {}'

diff --git a/python/react-series-data-viewer/chunking.py b/python/react-series-data-viewer/chunking.py
@@ -88,33 +88,20 @@ def write_index_json(
     chunk_dir,
     time_interval,
     series_range,
-    from_channel_index,
-    channel_count,
-    channel_names,
-    channel_ranges,
+    channel_metadata,
     chunk_size,
     downsamplings,
-    channel_chunks_list,
+    shapes,
     trace_types={}
 ):
     json_dict = OrderedDict([
         ('timeInterval', list(time_interval)),
         ('seriesRange', series_range),
         ('chunkSize', chunk_size),
-        ('downsamplings', list(downsamplings)),
-        ('shapes', [
-            list(downsampled.shape)
-            for downsampled in channel_chunks_list
-        ]),
+        ('downsamplings', downsamplings),
+        ('shapes', shapes),
         ('traceTypes', trace_types),
-        ('channelMetadata', [
-            {
-                'name': channel_names[i],
-                'seriesRange': channel_ranges[i],
-                'index': from_channel_index + i
-            }
-            for i in range(len(channel_ranges))
-        ])
+        ('channelMetadata', channel_metadata)
     ])
     create_path_dirs(chunk_dir)
 
@@ -216,18 +203,26 @@ def write_chunk_directory(path, chunk_size, loader, from_channel_index=0, from_c
     channel_chunks_list, time_interval, signal_range, channel_names, channel_ranges = mne_file_to_chunks(
         path, chunk_size, loader, from_channel_name, channel_count
     )
+
     if downsamplings is not None:
         channel_chunks_list = channel_chunks_list[:downsamplings]
+
+    channel_metadata = [
+        {
+            'name': channel_names[i],
+            'seriesRange': channel_ranges[i],
+            'index': from_channel_index + i
+        }
+        for i in range(len(channel_ranges))
+    ]
+
     write_index_json(
         chunk_dir,
         time_interval,
         signal_range,
-        from_channel_index,
-        channel_count,
-        channel_names,
-        channel_ranges,
+        channel_metadata,
         chunk_size,
-        range(len(channel_chunks_list)),
-        channel_chunks_list
+        list(range(len(channel_chunks_list))),
+        [list(downsampled.shape) for downsampled in channel_chunks_list]
     )
     write_chunks(chunk_dir, channel_chunks_list, from_channel_index)