Skip to content

Commit

Permalink
Update set file path override and chunks logic (#1050)
Browse files Browse the repository at this point in the history
* Update set file path override and chunks logic

* flake8
  • Loading branch information
laemtl authored Mar 15, 2024
1 parent c4e8e22 commit 8b20077
Show file tree
Hide file tree
Showing 4 changed files with 64 additions and 50 deletions.
20 changes: 19 additions & 1 deletion python/extract_eeg_bids_archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from lib.database_lib.config import Config
from lib.exitcode import SUCCESS, BAD_CONFIG_SETTING
from lib.log import Log
import lib.utilities as utilities

__license__ = "GPLv3"

Expand Down Expand Up @@ -191,8 +192,25 @@ def main():
if not error:
for modality in modalities:
tmp_eeg_modality_path = os.path.join(tmp_eeg_session_path, modality)
s3_data_dir = config_db_obj.get_config("EEGS3DataPath")

# if the EEG file was a set file, then update the filename for the .set
# and .fdt files in the .set file so it can find the proper file for
# visualization and analyses
set_files = [
os.path.join(tmp_eeg_modality_path, file)
for file in os.listdir(tmp_eeg_modality_path)
if os.path.splitext(file)[1] == '.set'
]
for set_full_path in set_files:
width_fdt_file = os.path.isfile(set_full_path.replace(".set", ".fdt"))

file_paths_updated = utilities.update_set_file_path_info(set_full_path, width_fdt_file)
if not file_paths_updated:
message = "WARNING: cannot update the set file " \
+ os.path.basename(set_full_path) + " path info"
print(message)

s3_data_dir = config_db_obj.get_config("EEGS3DataPath")
if s3_obj and s3_data_dir and s3_data_dir.startswith('s3://'):
s3_data_eeg_modality_path = os.path.join(s3_data_dir, eeg_session_rel_path, modality)

Expand Down
41 changes: 21 additions & 20 deletions python/lib/eeg.py
Original file line number Diff line number Diff line change
Expand Up @@ -513,26 +513,27 @@ def fetch_and_insert_eeg_files(self, derivatives=False, detect=True):
eeg_file_info, eeg_file_data
)

# if the EEG file was a set file, then update the filename for the .set
# and .fdt files in the .set file so it can find the proper file for
# visualization and analyses
file_paths_updated = file_type != 'set'
if not file_paths_updated:
set_full_path = os.path.join(self.data_dir, eeg_path)
fdt_full_path = eeg_file_data['fdt_file'] if 'fdt_file' in eeg_file_data.keys() else None

if fdt_full_path:
fdt_full_path = os.path.join(self.data_dir, eeg_file_data['fdt_file'])
file_paths_updated = utilities.update_set_file_path_info(set_full_path, fdt_full_path)

if file_paths_updated:
inserted_eegs.append({
'file_id': physio_file_id,
'file_path': eeg_path,
'eegjson_file_path': eegjson_file_path,
'fdt_file_path': fdt_file_path,
'original_file_data': eeg_file,
})
if self.loris_bids_root_dir:
# If we copy the file in assembly_bids and
# if the EEG file was a set file, then update the filename for the .set
# and .fdt files in the .set file so it can find the proper file for
# visualization and analyses
if file_type == 'set':
set_full_path = os.path.join(self.data_dir, eeg_path)
width_fdt_file = True if 'fdt_file' in eeg_file_data.keys() else False

file_paths_updated = utilities.update_set_file_path_info(set_full_path, width_fdt_file)
if not file_paths_updated:
message = "WARNING: cannot update the set file " + eeg_path + " path info"
print(message)

inserted_eegs.append({
'file_id': physio_file_id,
'file_path': eeg_path,
'eegjson_file_path': eegjson_file_path,
'fdt_file_path': fdt_file_path,
'original_file_data': eeg_file,
})

return inserted_eegs

Expand Down
10 changes: 5 additions & 5 deletions python/lib/utilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,16 +159,16 @@ def create_archive(files_to_archive, archive_path):
tar.close()


def update_set_file_path_info(set_file, fdt_file):
def update_set_file_path_info(set_file, with_fdt_file):
"""
Updates the path info of the set file with the correct filenames for .set and
.fdt files (for cases that had to be relabelled to include a Visit Label at
the time of import.
:param set_file: complete path of the .set file
:type set_file: str
:param fdt_file: complete path of the .fdt file
:type fdt_file: str
:param with_fdt_file: Confirm presence of a matching .fdt file
:type with_fdt_file: bool
"""

# grep the basename without the extension of set_file
Expand All @@ -187,7 +187,7 @@ def update_set_file_path_info(set_file, fdt_file):
dataset['setname'] = numpy.array(basename)
if 'EEG' in dataset.keys():
dataset['EEG'][0][0][1] = set_file_name
if fdt_file and 'EEG' in dataset.keys():
if with_fdt_file and 'EEG' in dataset.keys():
dataset['EEG'][0][0][15] = fdt_file_name
dataset['EEG'][0][0][40] = fdt_file_name

Expand All @@ -203,7 +203,7 @@ def update_set_file_path_info(set_file, fdt_file):
.format(set_file_name))
return False

if fdt_file:
if with_fdt_file:
if 'datfile' not in dataset.keys() or \
dataset['datfile'] != fdt_file_name:
print('Expected `datfile` field: {}'
Expand Down
43 changes: 19 additions & 24 deletions python/react-series-data-viewer/chunking.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,33 +88,20 @@ def write_index_json(
chunk_dir,
time_interval,
series_range,
from_channel_index,
channel_count,
channel_names,
channel_ranges,
channel_metadata,
chunk_size,
downsamplings,
channel_chunks_list,
shapes,
trace_types={}
):
json_dict = OrderedDict([
('timeInterval', list(time_interval)),
('seriesRange', series_range),
('chunkSize', chunk_size),
('downsamplings', list(downsamplings)),
('shapes', [
list(downsampled.shape)
for downsampled in channel_chunks_list
]),
('downsamplings', downsamplings),
('shapes', shapes),
('traceTypes', trace_types),
('channelMetadata', [
{
'name': channel_names[i],
'seriesRange': channel_ranges[i],
'index': from_channel_index + i
}
for i in range(len(channel_ranges))
])
('channelMetadata', channel_metadata)
])
create_path_dirs(chunk_dir)

Expand Down Expand Up @@ -216,18 +203,26 @@ def write_chunk_directory(path, chunk_size, loader, from_channel_index=0, from_c
channel_chunks_list, time_interval, signal_range, channel_names, channel_ranges = mne_file_to_chunks(
path, chunk_size, loader, from_channel_name, channel_count
)

if downsamplings is not None:
channel_chunks_list = channel_chunks_list[:downsamplings]

channel_metadata = [
{
'name': channel_names[i],
'seriesRange': channel_ranges[i],
'index': from_channel_index + i
}
for i in range(len(channel_ranges))
]

write_index_json(
chunk_dir,
time_interval,
signal_range,
from_channel_index,
channel_count,
channel_names,
channel_ranges,
channel_metadata,
chunk_size,
range(len(channel_chunks_list)),
channel_chunks_list
list(range(len(channel_chunks_list))),
[list(downsampled.shape) for downsampled in channel_chunks_list]
)
write_chunks(chunk_dir, channel_chunks_list, from_channel_index)

0 comments on commit 8b20077

Please sign in to comment.