From 70858d028d7c60c605bce2837dc2b3759522984f Mon Sep 17 00:00:00 2001
From: ramroomh <rhamati95@gmail.com>
Date: Fri, 13 May 2022 12:47:27 -0400
Subject: [PATCH 1/5] example illustrating data preprocessing of fMRIprep
 outputs for use in GLMsingle

---
 examples/example_bids_datasets.py | 174 ++++++++++++++++++++++++++++++
 1 file changed, 174 insertions(+)
 create mode 100755 examples/example_bids_datasets.py

diff --git a/examples/example_bids_datasets.py b/examples/example_bids_datasets.py
new file mode 100755
index 0000000..646f385
--- /dev/null
+++ b/examples/example_bids_datasets.py
@@ -0,0 +1,174 @@
+#!/home/rami/anaconda3/bin/python3
+# Import libraries
+
+import os
+import numpy as np
+import nibabel as nib
+import pandas as pd
+from bids import BIDSLayout
+from glmsingle.glmsingle import GLM_single
+
+def grab_data(root):
+    ''' Use pybids to easily iter over dataset generated using fMRIprep. '''
+    # Load data
+    raw = BIDSLayout(os.path.join(root,'rawdata'))
+    processed = BIDSLayout(os.path.join(root,'derivatives','iter4'))
+    # Print summary data
+    print(raw)
+    print(processed)
+    # Get data per run
+    bold_info = raw.get(extension='.nii.gz', suffix='bold', return_type='filename')[0]
+    metadata = raw.get_metadata(bold_info)
+    slicedata = nib.load(bold_info).get_fdata().astype(np.float32)
+    ''' To make an event file GLMsingle compatible, calculate total
+    acquisition time rounded to nearest second. Total time will represent
+    number of rows in new event file. '''
+    # Print run data
+    tr = metadata['RepetitionTime']
+    num_slices = slicedata.shape[3]
+    total_dur = round(tr * num_slices)
+    print('TR:', tr, '\n Number of slices acquired:', num_slices,
+          '\n Total run duration:', total_dur)
+    return raw, processed, tr, num_slices, total_dur
+
+def make_new_df(root, raw, total_dur):
+    ''' Make an empty dataframe, # of rows is equal to total run duration (seconds),
+    because we will upsample the TR to 1s to reduce jitter. # of columns equal to
+    total unique event types in .tsv file. In this case 3 unique event types exist,
+    as we ignore the event type representing intertrial intervals (ITIs). '''
+    # Grab event files
+    eventfiles = raw.get(extension='.tsv', suffix='events', return_type='filename')
+    # Load into pandas
+    for eventfile in eventfiles:
+        design = pd.read_csv(eventfile, sep='\t')
+        # Parse each event type, as indicated by value, where each integer represents a different event type
+        CSplus_shock = design.loc[design['value'] == 3]
+        CSplus = design.loc[design['value'] == 1]
+        CSminus = design.loc[design['value'] == 2]
+        # Make new df
+        new_design = pd.DataFrame(columns = ['TR', 'CSplusshock','CSplus','CSminus'])
+        new_design['TR'] = [i for i in range(1, (total_dur+1))]
+        # For each stimulus type, mark the onset of the stimulus with 1. All other values should be 0
+        for cs in [CSplus_shock, CSplus, CSminus]:
+            for i in range(0, len(cs.index)):
+                tr_start = round(cs.iloc[i,0])
+                if cs is CSplus_shock:
+                    new_design.iloc[tr_start, :] =  [tr_start, 1, 0, 0]
+                elif cs is CSplus:
+                    new_design.iloc[tr_start, :] =  [tr_start, 0, 1, 0]
+                elif cs is CSminus:
+                    new_design.iloc[tr_start, :] =  [tr_start, 0, 0, 1]
+                else:
+                    new_design.iloc[tr_start, :] =  [tr_start, 0, 0, 0]
+        # Fill NaNs with 0s
+        new_design.fillna(0, inplace=True)
+        # Give the event file a new name
+        new_name = raw.parse_file_entities(eventfile)
+        new_name['suffix'] = 'events_glmsingle'
+        pattern = 'sub-{}_ses-{}_task-{}_run-{}_{}.csv'
+        # Save the event file in derivatives, exclude TR column
+        new_design.to_csv(os.path.join(root, 'derivatives', 'iter4',
+        'sub-'+new_name['subject'], 'ses-'+new_name['session'], 'func',
+        pattern.format(new_name['subject'],
+        new_name['session'], new_name['task'], new_name['run'],
+        new_name['suffix'])), columns=['CSplusshock','CSplus','CSminus'],
+        index=False, header=False)
+        print('glmsingle event file saved:', new_name['subject'], 'run:', new_name['run'])
+def upsample_bold(processed, tr, tr_new, num_slices):
+    ''' Upsample task-based fMRI data to 1s using pyslicetime scripts:
+    https://github.com/Charestlab/pyslicetime/blob/master/example_nipype.py.
+    Output will have suffix _stc appended.'''
+    from slicetime.nipype_interface import SliceTime
+    # Grab all preprocessed runs
+    bold_runs = processed.get(extension='.nii.gz', suffix='bold', return_type='filename')
+    # Import parameters
+    tr_old = tr
+    tr_new = tr_new
+    num_slices = num_slices
+    # sliceorder needs to be 1-based (see fakeout below)
+    slicetimes = np.flip(np.arange(0, tr_old, tr_old/num_slices, dtype=object)).tolist()
+    # Iter over runs
+    for run in bold_runs:
+        print('Upsampling:', run)
+        st = SliceTime()
+        st.inputs.in_file = run
+        st.inputs.tr_old = tr
+        st.inputs.tr_new = tr_new
+        st.inputs.slicetimes = slicetimes
+        res = st.run()
+
+def smooth_bold(processed, smooth_num):
+    ''' Smooth data prior to first-level analysis using FSL in nipype.
+    Output will have suffix _smooth appended.'''
+    from nipype.interfaces.fsl import Smooth
+    # Grab all upsampled runs
+    bold_runs = processed.get(extension='.nii.gz', suffix='stc', return_type='filename')
+    # Smooth data before inputting to GLMsingle
+    for run in bold_runs:
+        print('Smoothing:', os.path.basename(run))
+        path = os.path.dirname(run)
+        os.chdir(path)
+        sm = Smooth()
+        sm.inputs.output_type = 'NIFTI_GZ'
+        sm.inputs.in_file = run
+        sm.inputs.fwhm = smooth_num
+        res = sm.run()
+
+def run_1st_level(processed, stimdur, tr_new):
+    ''' Input several runs per subject to perform cross-validation
+    in GLMdenoise (step 3 of GLMsingle). Imaging data and event data
+    should be appended into separate lists. '''
+    # Get all subjects
+    subjects = processed.get(return_type='id', target='subject')
+    data = []
+    design = []
+    # Loop over subjects
+    for sub in subjects:
+        # Grab all upsampled and smoothed runs
+        bold_runs = processed.get(subject=subjects, extension='.nii.gz', suffix='smooth', return_type='filename')
+        # Grab all GLMsingle-compatible event files
+        eventfiles = processed.get(subject=subjects, extension='.csv', suffix='glmsingle', return_type='filename')
+        # Append all runs for a single subject session
+        # fMRI data
+        for run in bold_runs:
+            r = nib.load(run).get_fdata().astype(np.float32)
+            data.append(r)
+        # design matrices
+        for event in eventfiles:
+            ev = pd.read_csv(event, header=None).to_numpy(dtype='float32')
+            design.append(ev)
+        # Initialize model
+        gs = GLM_single()
+        # Name output folders
+        metadata = processed.parse_file_entities(bold_runs[0])
+        outdir = os.path.join(os.path.dirname(bold_runs[0]), metadata['task'] + '_concat_glmsingle')
+        # Run GLMsingle
+        results = gs.fit(
+        design,
+        data,
+        stimdur,
+        tr_new,
+        outputdir=outdir)
+        # Clear data
+        data.clear()
+        design.clear()
+
+def main():
+    # Hardcoded file path to existing BIDS dataset
+    root = '/home/rami/test/'
+    # Stimulus duration
+    stimdur = 4
+    # Desired repetition time
+    tr_new = 1
+    # Desired smoothing
+    smooth_num = 6
+    # Execute functions
+    ''' Can comment out intermediate steps if already complete. '''
+    raw, processed, tr, num_slices, total_dur = grab_data(root)
+    make_new_df(root, raw, total_dur)
+    upsample_bold(processed, tr, tr_new, num_slices)
+    smooth_bold(processed, smooth_num)
+    run_1st_level(processed, stimdur, tr_new)
+if __name__ == "__main__":
+# execute only if run as a script
+    main()

From f617581be5a13b43ebdd580f8726cd0c2d9466a9 Mon Sep 17 00:00:00 2001
From: ramroomh <rhamati95@gmail.com>
Date: Wed, 18 May 2022 10:32:23 -0400
Subject: [PATCH 2/5] added filter if multiple resolutions exist

---
 examples/example_bids_datasets.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/examples/example_bids_datasets.py b/examples/example_bids_datasets.py
index 646f385..305fa15 100755
--- a/examples/example_bids_datasets.py
+++ b/examples/example_bids_datasets.py
@@ -81,6 +81,8 @@ def upsample_bold(processed, tr, tr_new, num_slices):
     from slicetime.nipype_interface import SliceTime
     # Grab all preprocessed runs
     bold_runs = processed.get(extension='.nii.gz', suffix='bold', return_type='filename')
+    # Filter for resolution - here we exclude res-2
+    bold_runsf = [b for b in bold_runs if 'res-2' not in b]
     # Import parameters
     tr_old = tr
     tr_new = tr_new
@@ -88,7 +90,7 @@ def upsample_bold(processed, tr, tr_new, num_slices):
     # sliceorder needs to be 1-based (see fakeout below)
     slicetimes = np.flip(np.arange(0, tr_old, tr_old/num_slices, dtype=object)).tolist()
     # Iter over runs
-    for run in bold_runs:
+    for run in bold_runsf:
         print('Upsampling:', run)
         st = SliceTime()
         st.inputs.in_file = run

From 13310498df50a40732467a439de023803b868710 Mon Sep 17 00:00:00 2001
From: ramroomh <rhamati95@gmail.com>
Date: Wed, 18 May 2022 11:47:06 -0400
Subject: [PATCH 3/5] hardcoded subjects list

---
 examples/example_bids_datasets.py | 33 +++++++++++++++++--------------
 1 file changed, 18 insertions(+), 15 deletions(-)

diff --git a/examples/example_bids_datasets.py b/examples/example_bids_datasets.py
index 305fa15..8083aaf 100755
--- a/examples/example_bids_datasets.py
+++ b/examples/example_bids_datasets.py
@@ -1,4 +1,4 @@
-#!/home/rami/anaconda3/bin/python3
+#!/group/tuominen/anaconda3/bin/python3
 # Import libraries
 
 import os
@@ -31,13 +31,13 @@ def grab_data(root):
           '\n Total run duration:', total_dur)
     return raw, processed, tr, num_slices, total_dur
 
-def make_new_df(root, raw, total_dur):
+def make_new_df(root, raw, subjects, total_dur):
     ''' Make an empty dataframe, # of rows is equal to total run duration (seconds),
     because we will upsample the TR to 1s to reduce jitter. # of columns equal to
     total unique event types in .tsv file. In this case 3 unique event types exist,
     as we ignore the event type representing intertrial intervals (ITIs). '''
     # Grab event files
-    eventfiles = raw.get(extension='.tsv', suffix='events', return_type='filename')
+    eventfiles = raw.get(subject=subjects, extension='.tsv', suffix='events', return_type='filename')
     # Load into pandas
     for eventfile in eventfiles:
         design = pd.read_csv(eventfile, sep='\t')
@@ -51,7 +51,7 @@ def make_new_df(root, raw, total_dur):
         # For each stimulus type, mark the onset of the stimulus with 1. All other values should be 0
         for cs in [CSplus_shock, CSplus, CSminus]:
             for i in range(0, len(cs.index)):
-                tr_start = round(cs.iloc[i,0])
+                tr_start = int(round(cs.iloc[i,0]))
                 if cs is CSplus_shock:
                     new_design.iloc[tr_start, :] =  [tr_start, 1, 0, 0]
                 elif cs is CSplus:
@@ -74,13 +74,13 @@ def make_new_df(root, raw, total_dur):
         new_name['suffix'])), columns=['CSplusshock','CSplus','CSminus'],
         index=False, header=False)
         print('glmsingle event file saved:', new_name['subject'], 'run:', new_name['run'])
-def upsample_bold(processed, tr, tr_new, num_slices):
+def upsample_bold(processed, tr, tr_new, num_slices, subjects):
     ''' Upsample task-based fMRI data to 1s using pyslicetime scripts:
     https://github.com/Charestlab/pyslicetime/blob/master/example_nipype.py.
     Output will have suffix _stc appended.'''
     from slicetime.nipype_interface import SliceTime
     # Grab all preprocessed runs
-    bold_runs = processed.get(extension='.nii.gz', suffix='bold', return_type='filename')
+    bold_runs = processed.get(subject=subjects, extension='.nii.gz', suffix='bold', return_type='filename')
     # Filter for resolution - here we exclude res-2
     bold_runsf = [b for b in bold_runs if 'res-2' not in b]
     # Import parameters
@@ -99,12 +99,12 @@ def upsample_bold(processed, tr, tr_new, num_slices):
         st.inputs.slicetimes = slicetimes
         res = st.run()
 
-def smooth_bold(processed, smooth_num):
+def smooth_bold(processed, smooth_num, subjects):
     ''' Smooth data prior to first-level analysis using FSL in nipype.
     Output will have suffix _smooth appended.'''
     from nipype.interfaces.fsl import Smooth
     # Grab all upsampled runs
-    bold_runs = processed.get(extension='.nii.gz', suffix='stc', return_type='filename')
+    bold_runs = processed.get(subject=subjects, extension='.nii.gz', suffix='stc', return_type='filename')
     # Smooth data before inputting to GLMsingle
     for run in bold_runs:
         print('Smoothing:', os.path.basename(run))
@@ -116,12 +116,11 @@ def smooth_bold(processed, smooth_num):
         sm.inputs.fwhm = smooth_num
         res = sm.run()
 
-def run_1st_level(processed, stimdur, tr_new):
+def run_1st_level(processed, stimdur, tr_new, subjects):
     ''' Input several runs per subject to perform cross-validation
     in GLMdenoise (step 3 of GLMsingle). Imaging data and event data
     should be appended into separate lists. '''
     # Get all subjects
-    subjects = processed.get(return_type='id', target='subject')
     data = []
     design = []
     # Loop over subjects
@@ -157,20 +156,24 @@ def run_1st_level(processed, stimdur, tr_new):
 
 def main():
     # Hardcoded file path to existing BIDS dataset
-    root = '/home/rami/test/'
+    root = '/group/tuominen/EmoSal_BIDS'
     # Stimulus duration
     stimdur = 4
     # Desired repetition time
     tr_new = 1
     # Desired smoothing
     smooth_num = 6
+    # Desired subjects
+    subjects = ['avl003', 'avl004', 'avl005', 'avl006', 'avl007', 'avl009', 'avl010',
+    'avl011', 'avl012', 'avl-013r', 'avl014', 'avl016', 'avl017', 'avl018', 'avl019',
+    'avl021', 'avl022', 'avl024', 'avl025', 'avl027', 'avl028', 'avl200', 'avl201']
     # Execute functions
     ''' Can comment out intermediate steps if already complete. '''
     raw, processed, tr, num_slices, total_dur = grab_data(root)
-    make_new_df(root, raw, total_dur)
-    upsample_bold(processed, tr, tr_new, num_slices)
-    smooth_bold(processed, smooth_num)
-    run_1st_level(processed, stimdur, tr_new)
+    make_new_df(root, raw, subjects, total_dur)
+    upsample_bold(processed, tr, tr_new, num_slices, subjects)
+    smooth_bold(processed, smooth_num, subjects)
+    run_1st_level(processed, stimdur, tr_new, subjects)
 if __name__ == "__main__":
 # execute only if run as a script
     main()

From 2eb87a1204f8010d9d431a71c79b0df5c4df45f1 Mon Sep 17 00:00:00 2001
From: ramroomh <rhamati95@gmail.com>
Date: Wed, 1 Jun 2022 15:37:06 -0400
Subject: [PATCH 4/5] masks bold file prior to inputting into GLMsingle

---
 examples/example_bids_datasets.py | 26 ++++++++++++++++----------
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/examples/example_bids_datasets.py b/examples/example_bids_datasets.py
index 8083aaf..af2474c 100755
--- a/examples/example_bids_datasets.py
+++ b/examples/example_bids_datasets.py
@@ -126,13 +126,21 @@ def run_1st_level(processed, stimdur, tr_new, subjects):
     # Loop over subjects
     for sub in subjects:
         # Grab all upsampled and smoothed runs
-        bold_runs = processed.get(subject=subjects, extension='.nii.gz', suffix='smooth', return_type='filename')
+        bold_runs = processed.get(subject=sub, extension='.nii.gz', suffix='smooth', return_type='filename')
+        # Grab all mask files for each bold run
+        mask_runs = processed.get(subject=sub, extension='.nii.gz', suffix='mask', return_type='filename', datatype='func')
+        # Merge
+        masked_runs = dict(zip(bold_runs, mask_runs))
         # Grab all GLMsingle-compatible event files
-        eventfiles = processed.get(subject=subjects, extension='.csv', suffix='glmsingle', return_type='filename')
+        eventfiles = processed.get(subject=sub, extension='.csv', suffix='glmsingle', return_type='filename')
         # Append all runs for a single subject session
         # fMRI data
-        for run in bold_runs:
+        for run in masked_runs.keys():
             r = nib.load(run).get_fdata().astype(np.float32)
+            m = nib.load(masked_runs[run]).get_fdata().astype(np.float32)
+            # Set all values outside brain mask to 0
+            zeros = np.where(m==0)
+            r[zeros] = 0
             data.append(r)
         # design matrices
         for event in eventfiles:
@@ -156,7 +164,7 @@ def run_1st_level(processed, stimdur, tr_new, subjects):
 
 def main():
     # Hardcoded file path to existing BIDS dataset
-    root = '/group/tuominen/EmoSal_BIDS'
+    root = '/Users/ramihamati/Documents/EmoSal_test'
     # Stimulus duration
     stimdur = 4
     # Desired repetition time
@@ -164,15 +172,13 @@ def main():
     # Desired smoothing
     smooth_num = 6
     # Desired subjects
-    subjects = ['avl003', 'avl004', 'avl005', 'avl006', 'avl007', 'avl009', 'avl010',
-    'avl011', 'avl012', 'avl-013r', 'avl014', 'avl016', 'avl017', 'avl018', 'avl019',
-    'avl021', 'avl022', 'avl024', 'avl025', 'avl027', 'avl028', 'avl200', 'avl201']
+    subjects = ['avl001']
     # Execute functions
     ''' Can comment out intermediate steps if already complete. '''
     raw, processed, tr, num_slices, total_dur = grab_data(root)
-    make_new_df(root, raw, subjects, total_dur)
-    upsample_bold(processed, tr, tr_new, num_slices, subjects)
-    smooth_bold(processed, smooth_num, subjects)
+    #make_new_df(root, raw, subjects, total_dur)
+    #upsample_bold(processed, tr, tr_new, num_slices, subjects)
+    #smooth_bold(processed, smooth_num, subjects)
     run_1st_level(processed, stimdur, tr_new, subjects)
 if __name__ == "__main__":
 # execute only if run as a script

From 4513506688ba9bd5f0b6844341cba5e8b77cb14d Mon Sep 17 00:00:00 2001
From: ramroomh <rhamati95@gmail.com>
Date: Wed, 8 Jun 2022 22:35:13 -0400
Subject: [PATCH 5/5] filters for appropriate mask shape

---
 examples/example_bids_datasets.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/examples/example_bids_datasets.py b/examples/example_bids_datasets.py
index af2474c..7d03f72 100755
--- a/examples/example_bids_datasets.py
+++ b/examples/example_bids_datasets.py
@@ -129,13 +129,15 @@ def run_1st_level(processed, stimdur, tr_new, subjects):
         bold_runs = processed.get(subject=sub, extension='.nii.gz', suffix='smooth', return_type='filename')
         # Grab all mask files for each bold run
         mask_runs = processed.get(subject=sub, extension='.nii.gz', suffix='mask', return_type='filename', datatype='func')
+        mask_runsf = [m for m in mask_runs if 'res-2' in m]
         # Merge
-        masked_runs = dict(zip(bold_runs, mask_runs))
+        masked_runs = dict(zip(bold_runs, mask_runsf))
         # Grab all GLMsingle-compatible event files
         eventfiles = processed.get(subject=sub, extension='.csv', suffix='glmsingle', return_type='filename')
         # Append all runs for a single subject session
         # fMRI data
         for run in masked_runs.keys():
+            print('Using:', os.path.dirname(masked_runs[run]), '\nto mask:', os.path.dirname(run))
             r = nib.load(run).get_fdata().astype(np.float32)
             m = nib.load(masked_runs[run]).get_fdata().astype(np.float32)
             # Set all values outside brain mask to 0