From 70858d028d7c60c605bce2837dc2b3759522984f Mon Sep 17 00:00:00 2001 From: ramroomh Date: Fri, 13 May 2022 12:47:27 -0400 Subject: [PATCH 1/5] example illustrating data preprocessing of fMRIprep outputs for use in GLMsingle --- examples/example_bids_datasets.py | 174 ++++++++++++++++++++++++++++++ 1 file changed, 174 insertions(+) create mode 100755 examples/example_bids_datasets.py diff --git a/examples/example_bids_datasets.py b/examples/example_bids_datasets.py new file mode 100755 index 0000000..646f385 --- /dev/null +++ b/examples/example_bids_datasets.py @@ -0,0 +1,174 @@ +#!/home/rami/anaconda3/bin/python3 +# Import libraries + +import os +import numpy as np +import nibabel as nib +import pandas as pd +from bids import BIDSLayout +from glmsingle.glmsingle import GLM_single + +def grab_data(root): + ''' Use pybids to easily iter over dataset generated using fMRIprep. ''' + # Load data + raw = BIDSLayout(os.path.join(root,'rawdata')) + processed = BIDSLayout(os.path.join(root,'derivatives','iter4')) + # Print summary data + print(raw) + print(processed) + # Get data per run + bold_info = raw.get(extension='.nii.gz', suffix='bold', return_type='filename')[0] + metadata = raw.get_metadata(bold_info) + slicedata = nib.load(bold_info).get_fdata().astype(np.float32) + ''' To make an event file GLMsingle compatible, calculate total + acquisition time rounded to nearest second. Total time will represent + number of rows in new event file. ''' + # Print run data + tr = metadata['RepetitionTime'] + num_slices = slicedata.shape[3] + total_dur = round(tr * num_slices) + print('TR:', tr, '\n Number of slices acquired:', num_slices, + '\n Total run duration:', total_dur) + return raw, processed, tr, num_slices, total_dur + +def make_new_df(root, raw, total_dur): + ''' Make an empty dataframe, # of rows is equal to total run duration (seconds), + because we will upsample the TR to 1s to reduce jitter. # of columns equal to + total unique event types in .tsv file. In this case 3 unique event types exist, + as we ignore the event type representing intertrial intervals (ITIs). ''' + # Grab event files + eventfiles = raw.get(extension='.tsv', suffix='events', return_type='filename') + # Load into pandas + for eventfile in eventfiles: + design = pd.read_csv(eventfile, sep='\t') + # Parse each event type, as indicated by value, where each integer represents a different event type + CSplus_shock = design.loc[design['value'] == 3] + CSplus = design.loc[design['value'] == 1] + CSminus = design.loc[design['value'] == 2] + # Make new df + new_design = pd.DataFrame(columns = ['TR', 'CSplusshock','CSplus','CSminus']) + new_design['TR'] = [i for i in range(1, (total_dur+1))] + # For each stimulus type, mark the onset of the stimulus with 1. All other values should be 0 + for cs in [CSplus_shock, CSplus, CSminus]: + for i in range(0, len(cs.index)): + tr_start = round(cs.iloc[i,0]) + if cs is CSplus_shock: + new_design.iloc[tr_start, :] = [tr_start, 1, 0, 0] + elif cs is CSplus: + new_design.iloc[tr_start, :] = [tr_start, 0, 1, 0] + elif cs is CSminus: + new_design.iloc[tr_start, :] = [tr_start, 0, 0, 1] + else: + new_design.iloc[tr_start, :] = [tr_start, 0, 0, 0] + # Fill NaNs with 0s + new_design.fillna(0, inplace=True) + # Give the event file a new name + new_name = raw.parse_file_entities(eventfile) + new_name['suffix'] = 'events_glmsingle' + pattern = 'sub-{}_ses-{}_task-{}_run-{}_{}.csv' + # Save the event file in derivatives, exclude TR column + new_design.to_csv(os.path.join(root, 'derivatives', 'iter4', + 'sub-'+new_name['subject'], 'ses-'+new_name['session'], 'func', + pattern.format(new_name['subject'], + new_name['session'], new_name['task'], new_name['run'], + new_name['suffix'])), columns=['CSplusshock','CSplus','CSminus'], + index=False, header=False) + print('glmsingle event file saved:', new_name['subject'], 'run:', new_name['run']) +def upsample_bold(processed, tr, tr_new, num_slices): + ''' Upsample task-based fMRI data to 1s using pyslicetime scripts: + https://github.com/Charestlab/pyslicetime/blob/master/example_nipype.py. + Output will have suffix _stc appended.''' + from slicetime.nipype_interface import SliceTime + # Grab all preprocessed runs + bold_runs = processed.get(extension='.nii.gz', suffix='bold', return_type='filename') + # Import parameters + tr_old = tr + tr_new = tr_new + num_slices = num_slices + # sliceorder needs to be 1-based (see fakeout below) + slicetimes = np.flip(np.arange(0, tr_old, tr_old/num_slices, dtype=object)).tolist() + # Iter over runs + for run in bold_runs: + print('Upsampling:', run) + st = SliceTime() + st.inputs.in_file = run + st.inputs.tr_old = tr + st.inputs.tr_new = tr_new + st.inputs.slicetimes = slicetimes + res = st.run() + +def smooth_bold(processed, smooth_num): + ''' Smooth data prior to first-level analysis using FSL in nipype. + Output will have suffix _smooth appended.''' + from nipype.interfaces.fsl import Smooth + # Grab all upsampled runs + bold_runs = processed.get(extension='.nii.gz', suffix='stc', return_type='filename') + # Smooth data before inputting to GLMsingle + for run in bold_runs: + print('Smoothing:', os.path.basename(run)) + path = os.path.dirname(run) + os.chdir(path) + sm = Smooth() + sm.inputs.output_type = 'NIFTI_GZ' + sm.inputs.in_file = run + sm.inputs.fwhm = smooth_num + res = sm.run() + +def run_1st_level(processed, stimdur, tr_new): + ''' Input several runs per subject to perform cross-validation + in GLMdenoise (step 3 of GLMsingle). Imaging data and event data + should be appended into separate lists. ''' + # Get all subjects + subjects = processed.get(return_type='id', target='subject') + data = [] + design = [] + # Loop over subjects + for sub in subjects: + # Grab all upsampled and smoothed runs + bold_runs = processed.get(subject=subjects, extension='.nii.gz', suffix='smooth', return_type='filename') + # Grab all GLMsingle-compatible event files + eventfiles = processed.get(subject=subjects, extension='.csv', suffix='glmsingle', return_type='filename') + # Append all runs for a single subject session + # fMRI data + for run in bold_runs: + r = nib.load(run).get_fdata().astype(np.float32) + data.append(r) + # design matrices + for event in eventfiles: + ev = pd.read_csv(event, header=None).to_numpy(dtype='float32') + design.append(ev) + # Initialize model + gs = GLM_single() + # Name output folders + metadata = processed.parse_file_entities(bold_runs[0]) + outdir = os.path.join(os.path.dirname(bold_runs[0]), metadata['task'] + '_concat_glmsingle') + # Run GLMsingle + results = gs.fit( + design, + data, + stimdur, + tr_new, + outputdir=outdir) + # Clear data + data.clear() + design.clear() + +def main(): + # Hardcoded file path to existing BIDS dataset + root = '/home/rami/test/' + # Stimulus duration + stimdur = 4 + # Desired repetition time + tr_new = 1 + # Desired smoothing + smooth_num = 6 + # Execute functions + ''' Can comment out intermediate steps if already complete. ''' + raw, processed, tr, num_slices, total_dur = grab_data(root) + make_new_df(root, raw, total_dur) + upsample_bold(processed, tr, tr_new, num_slices) + smooth_bold(processed, smooth_num) + run_1st_level(processed, stimdur, tr_new) +if __name__ == "__main__": +# execute only if run as a script + main() From f617581be5a13b43ebdd580f8726cd0c2d9466a9 Mon Sep 17 00:00:00 2001 From: ramroomh Date: Wed, 18 May 2022 10:32:23 -0400 Subject: [PATCH 2/5] added filter if multiple resolutions exist --- examples/example_bids_datasets.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/examples/example_bids_datasets.py b/examples/example_bids_datasets.py index 646f385..305fa15 100755 --- a/examples/example_bids_datasets.py +++ b/examples/example_bids_datasets.py @@ -81,6 +81,8 @@ def upsample_bold(processed, tr, tr_new, num_slices): from slicetime.nipype_interface import SliceTime # Grab all preprocessed runs bold_runs = processed.get(extension='.nii.gz', suffix='bold', return_type='filename') + # Filter for resolution - here we exclude res-2 + bold_runsf = [b for b in bold_runs if 'res-2' not in b] # Import parameters tr_old = tr tr_new = tr_new @@ -88,7 +90,7 @@ def upsample_bold(processed, tr, tr_new, num_slices): # sliceorder needs to be 1-based (see fakeout below) slicetimes = np.flip(np.arange(0, tr_old, tr_old/num_slices, dtype=object)).tolist() # Iter over runs - for run in bold_runs: + for run in bold_runsf: print('Upsampling:', run) st = SliceTime() st.inputs.in_file = run From 13310498df50a40732467a439de023803b868710 Mon Sep 17 00:00:00 2001 From: ramroomh Date: Wed, 18 May 2022 11:47:06 -0400 Subject: [PATCH 3/5] hardcoded subjects list --- examples/example_bids_datasets.py | 33 +++++++++++++++++-------------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/examples/example_bids_datasets.py b/examples/example_bids_datasets.py index 305fa15..8083aaf 100755 --- a/examples/example_bids_datasets.py +++ b/examples/example_bids_datasets.py @@ -1,4 +1,4 @@ -#!/home/rami/anaconda3/bin/python3 +#!/group/tuominen/anaconda3/bin/python3 # Import libraries import os @@ -31,13 +31,13 @@ def grab_data(root): '\n Total run duration:', total_dur) return raw, processed, tr, num_slices, total_dur -def make_new_df(root, raw, total_dur): +def make_new_df(root, raw, subjects, total_dur): ''' Make an empty dataframe, # of rows is equal to total run duration (seconds), because we will upsample the TR to 1s to reduce jitter. # of columns equal to total unique event types in .tsv file. In this case 3 unique event types exist, as we ignore the event type representing intertrial intervals (ITIs). ''' # Grab event files - eventfiles = raw.get(extension='.tsv', suffix='events', return_type='filename') + eventfiles = raw.get(subject=subjects, extension='.tsv', suffix='events', return_type='filename') # Load into pandas for eventfile in eventfiles: design = pd.read_csv(eventfile, sep='\t') @@ -51,7 +51,7 @@ def make_new_df(root, raw, total_dur): # For each stimulus type, mark the onset of the stimulus with 1. All other values should be 0 for cs in [CSplus_shock, CSplus, CSminus]: for i in range(0, len(cs.index)): - tr_start = round(cs.iloc[i,0]) + tr_start = int(round(cs.iloc[i,0])) if cs is CSplus_shock: new_design.iloc[tr_start, :] = [tr_start, 1, 0, 0] elif cs is CSplus: @@ -74,13 +74,13 @@ def make_new_df(root, raw, total_dur): new_name['suffix'])), columns=['CSplusshock','CSplus','CSminus'], index=False, header=False) print('glmsingle event file saved:', new_name['subject'], 'run:', new_name['run']) -def upsample_bold(processed, tr, tr_new, num_slices): +def upsample_bold(processed, tr, tr_new, num_slices, subjects): ''' Upsample task-based fMRI data to 1s using pyslicetime scripts: https://github.com/Charestlab/pyslicetime/blob/master/example_nipype.py. Output will have suffix _stc appended.''' from slicetime.nipype_interface import SliceTime # Grab all preprocessed runs - bold_runs = processed.get(extension='.nii.gz', suffix='bold', return_type='filename') + bold_runs = processed.get(subject=subjects, extension='.nii.gz', suffix='bold', return_type='filename') # Filter for resolution - here we exclude res-2 bold_runsf = [b for b in bold_runs if 'res-2' not in b] # Import parameters @@ -99,12 +99,12 @@ def upsample_bold(processed, tr, tr_new, num_slices): st.inputs.slicetimes = slicetimes res = st.run() -def smooth_bold(processed, smooth_num): +def smooth_bold(processed, smooth_num, subjects): ''' Smooth data prior to first-level analysis using FSL in nipype. Output will have suffix _smooth appended.''' from nipype.interfaces.fsl import Smooth # Grab all upsampled runs - bold_runs = processed.get(extension='.nii.gz', suffix='stc', return_type='filename') + bold_runs = processed.get(subject=subjects, extension='.nii.gz', suffix='stc', return_type='filename') # Smooth data before inputting to GLMsingle for run in bold_runs: print('Smoothing:', os.path.basename(run)) @@ -116,12 +116,11 @@ def smooth_bold(processed, smooth_num): sm.inputs.fwhm = smooth_num res = sm.run() -def run_1st_level(processed, stimdur, tr_new): +def run_1st_level(processed, stimdur, tr_new, subjects): ''' Input several runs per subject to perform cross-validation in GLMdenoise (step 3 of GLMsingle). Imaging data and event data should be appended into separate lists. ''' # Get all subjects - subjects = processed.get(return_type='id', target='subject') data = [] design = [] # Loop over subjects @@ -157,20 +156,24 @@ def run_1st_level(processed, stimdur, tr_new): def main(): # Hardcoded file path to existing BIDS dataset - root = '/home/rami/test/' + root = '/group/tuominen/EmoSal_BIDS' # Stimulus duration stimdur = 4 # Desired repetition time tr_new = 1 # Desired smoothing smooth_num = 6 + # Desired subjects + subjects = ['avl003', 'avl004', 'avl005', 'avl006', 'avl007', 'avl009', 'avl010', + 'avl011', 'avl012', 'avl-013r', 'avl014', 'avl016', 'avl017', 'avl018', 'avl019', + 'avl021', 'avl022', 'avl024', 'avl025', 'avl027', 'avl028', 'avl200', 'avl201'] # Execute functions ''' Can comment out intermediate steps if already complete. ''' raw, processed, tr, num_slices, total_dur = grab_data(root) - make_new_df(root, raw, total_dur) - upsample_bold(processed, tr, tr_new, num_slices) - smooth_bold(processed, smooth_num) - run_1st_level(processed, stimdur, tr_new) + make_new_df(root, raw, subjects, total_dur) + upsample_bold(processed, tr, tr_new, num_slices, subjects) + smooth_bold(processed, smooth_num, subjects) + run_1st_level(processed, stimdur, tr_new, subjects) if __name__ == "__main__": # execute only if run as a script main() From 2eb87a1204f8010d9d431a71c79b0df5c4df45f1 Mon Sep 17 00:00:00 2001 From: ramroomh Date: Wed, 1 Jun 2022 15:37:06 -0400 Subject: [PATCH 4/5] masks bold file prior to inputting into GLMsingle --- examples/example_bids_datasets.py | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/examples/example_bids_datasets.py b/examples/example_bids_datasets.py index 8083aaf..af2474c 100755 --- a/examples/example_bids_datasets.py +++ b/examples/example_bids_datasets.py @@ -126,13 +126,21 @@ def run_1st_level(processed, stimdur, tr_new, subjects): # Loop over subjects for sub in subjects: # Grab all upsampled and smoothed runs - bold_runs = processed.get(subject=subjects, extension='.nii.gz', suffix='smooth', return_type='filename') + bold_runs = processed.get(subject=sub, extension='.nii.gz', suffix='smooth', return_type='filename') + # Grab all mask files for each bold run + mask_runs = processed.get(subject=sub, extension='.nii.gz', suffix='mask', return_type='filename', datatype='func') + # Merge + masked_runs = dict(zip(bold_runs, mask_runs)) # Grab all GLMsingle-compatible event files - eventfiles = processed.get(subject=subjects, extension='.csv', suffix='glmsingle', return_type='filename') + eventfiles = processed.get(subject=sub, extension='.csv', suffix='glmsingle', return_type='filename') # Append all runs for a single subject session # fMRI data - for run in bold_runs: + for run in masked_runs.keys(): r = nib.load(run).get_fdata().astype(np.float32) + m = nib.load(masked_runs[run]).get_fdata().astype(np.float32) + # Set all values outside brain mask to 0 + zeros = np.where(m==0) + r[zeros] = 0 data.append(r) # design matrices for event in eventfiles: @@ -156,7 +164,7 @@ def run_1st_level(processed, stimdur, tr_new, subjects): def main(): # Hardcoded file path to existing BIDS dataset - root = '/group/tuominen/EmoSal_BIDS' + root = '/Users/ramihamati/Documents/EmoSal_test' # Stimulus duration stimdur = 4 # Desired repetition time @@ -164,15 +172,13 @@ def main(): # Desired smoothing smooth_num = 6 # Desired subjects - subjects = ['avl003', 'avl004', 'avl005', 'avl006', 'avl007', 'avl009', 'avl010', - 'avl011', 'avl012', 'avl-013r', 'avl014', 'avl016', 'avl017', 'avl018', 'avl019', - 'avl021', 'avl022', 'avl024', 'avl025', 'avl027', 'avl028', 'avl200', 'avl201'] + subjects = ['avl001'] # Execute functions ''' Can comment out intermediate steps if already complete. ''' raw, processed, tr, num_slices, total_dur = grab_data(root) - make_new_df(root, raw, subjects, total_dur) - upsample_bold(processed, tr, tr_new, num_slices, subjects) - smooth_bold(processed, smooth_num, subjects) + #make_new_df(root, raw, subjects, total_dur) + #upsample_bold(processed, tr, tr_new, num_slices, subjects) + #smooth_bold(processed, smooth_num, subjects) run_1st_level(processed, stimdur, tr_new, subjects) if __name__ == "__main__": # execute only if run as a script From 4513506688ba9bd5f0b6844341cba5e8b77cb14d Mon Sep 17 00:00:00 2001 From: ramroomh Date: Wed, 8 Jun 2022 22:35:13 -0400 Subject: [PATCH 5/5] filters for appropriate mask shape --- examples/example_bids_datasets.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/examples/example_bids_datasets.py b/examples/example_bids_datasets.py index af2474c..7d03f72 100755 --- a/examples/example_bids_datasets.py +++ b/examples/example_bids_datasets.py @@ -129,13 +129,15 @@ def run_1st_level(processed, stimdur, tr_new, subjects): bold_runs = processed.get(subject=sub, extension='.nii.gz', suffix='smooth', return_type='filename') # Grab all mask files for each bold run mask_runs = processed.get(subject=sub, extension='.nii.gz', suffix='mask', return_type='filename', datatype='func') + mask_runsf = [m for m in mask_runs if 'res-2' in m] # Merge - masked_runs = dict(zip(bold_runs, mask_runs)) + masked_runs = dict(zip(bold_runs, mask_runsf)) # Grab all GLMsingle-compatible event files eventfiles = processed.get(subject=sub, extension='.csv', suffix='glmsingle', return_type='filename') # Append all runs for a single subject session # fMRI data for run in masked_runs.keys(): + print('Using:', os.path.dirname(masked_runs[run]), '\nto mask:', os.path.dirname(run)) r = nib.load(run).get_fdata().astype(np.float32) m = nib.load(masked_runs[run]).get_fdata().astype(np.float32) # Set all values outside brain mask to 0