-
Notifications
You must be signed in to change notification settings - Fork 46
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
example illustrating data preprocessing of fMRIprep outputs for use i… #64
base: main
Are you sure you want to change the base?
Changes from all commits
70858d0
f617581
1331049
2eb87a1
4513506
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,187 @@ | ||
#!/group/tuominen/anaconda3/bin/python3 | ||
# Import libraries | ||
|
||
import os | ||
import numpy as np | ||
import nibabel as nib | ||
import pandas as pd | ||
from bids import BIDSLayout | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I suspect you will need to add a requirements.txt to mention pybids need to be installed |
||
from glmsingle.glmsingle import GLM_single | ||
|
||
def grab_data(root): | ||
''' Use pybids to easily iter over dataset generated using fMRIprep. ''' | ||
# Load data | ||
raw = BIDSLayout(os.path.join(root,'rawdata')) | ||
processed = BIDSLayout(os.path.join(root,'derivatives','iter4')) | ||
# Print summary data | ||
print(raw) | ||
print(processed) | ||
# Get data per run | ||
bold_info = raw.get(extension='.nii.gz', suffix='bold', return_type='filename')[0] | ||
metadata = raw.get_metadata(bold_info) | ||
slicedata = nib.load(bold_info).get_fdata().astype(np.float32) | ||
''' To make an event file GLMsingle compatible, calculate total | ||
acquisition time rounded to nearest second. Total time will represent | ||
number of rows in new event file. ''' | ||
# Print run data | ||
tr = metadata['RepetitionTime'] | ||
num_slices = slicedata.shape[3] | ||
total_dur = round(tr * num_slices) | ||
print('TR:', tr, '\n Number of slices acquired:', num_slices, | ||
'\n Total run duration:', total_dur) | ||
return raw, processed, tr, num_slices, total_dur | ||
|
||
def make_new_df(root, raw, subjects, total_dur): | ||
''' Make an empty dataframe, # of rows is equal to total run duration (seconds), | ||
because we will upsample the TR to 1s to reduce jitter. # of columns equal to | ||
total unique event types in .tsv file. In this case 3 unique event types exist, | ||
as we ignore the event type representing intertrial intervals (ITIs). ''' | ||
# Grab event files | ||
eventfiles = raw.get(subject=subjects, extension='.tsv', suffix='events', return_type='filename') | ||
# Load into pandas | ||
for eventfile in eventfiles: | ||
design = pd.read_csv(eventfile, sep='\t') | ||
# Parse each event type, as indicated by value, where each integer represents a different event type | ||
CSplus_shock = design.loc[design['value'] == 3] | ||
CSplus = design.loc[design['value'] == 1] | ||
CSminus = design.loc[design['value'] == 2] | ||
# Make new df | ||
new_design = pd.DataFrame(columns = ['TR', 'CSplusshock','CSplus','CSminus']) | ||
new_design['TR'] = [i for i in range(1, (total_dur+1))] | ||
# For each stimulus type, mark the onset of the stimulus with 1. All other values should be 0 | ||
for cs in [CSplus_shock, CSplus, CSminus]: | ||
for i in range(0, len(cs.index)): | ||
tr_start = int(round(cs.iloc[i,0])) | ||
if cs is CSplus_shock: | ||
new_design.iloc[tr_start, :] = [tr_start, 1, 0, 0] | ||
elif cs is CSplus: | ||
new_design.iloc[tr_start, :] = [tr_start, 0, 1, 0] | ||
elif cs is CSminus: | ||
new_design.iloc[tr_start, :] = [tr_start, 0, 0, 1] | ||
else: | ||
new_design.iloc[tr_start, :] = [tr_start, 0, 0, 0] | ||
# Fill NaNs with 0s | ||
new_design.fillna(0, inplace=True) | ||
# Give the event file a new name | ||
new_name = raw.parse_file_entities(eventfile) | ||
new_name['suffix'] = 'events_glmsingle' | ||
pattern = 'sub-{}_ses-{}_task-{}_run-{}_{}.csv' | ||
# Save the event file in derivatives, exclude TR column | ||
new_design.to_csv(os.path.join(root, 'derivatives', 'iter4', | ||
'sub-'+new_name['subject'], 'ses-'+new_name['session'], 'func', | ||
pattern.format(new_name['subject'], | ||
new_name['session'], new_name['task'], new_name['run'], | ||
new_name['suffix'])), columns=['CSplusshock','CSplus','CSminus'], | ||
index=False, header=False) | ||
Comment on lines
+67
to
+75
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You probably : |
||
print('glmsingle event file saved:', new_name['subject'], 'run:', new_name['run']) | ||
def upsample_bold(processed, tr, tr_new, num_slices, subjects): | ||
''' Upsample task-based fMRI data to 1s using pyslicetime scripts: | ||
https://github.com/Charestlab/pyslicetime/blob/master/example_nipype.py. | ||
Output will have suffix _stc appended.''' | ||
from slicetime.nipype_interface import SliceTime | ||
# Grab all preprocessed runs | ||
bold_runs = processed.get(subject=subjects, extension='.nii.gz', suffix='bold', return_type='filename') | ||
# Filter for resolution - here we exclude res-2 | ||
bold_runsf = [b for b in bold_runs if 'res-2' not in b] | ||
# Import parameters | ||
tr_old = tr | ||
tr_new = tr_new | ||
num_slices = num_slices | ||
# sliceorder needs to be 1-based (see fakeout below) | ||
slicetimes = np.flip(np.arange(0, tr_old, tr_old/num_slices, dtype=object)).tolist() | ||
# Iter over runs | ||
for run in bold_runsf: | ||
print('Upsampling:', run) | ||
st = SliceTime() | ||
st.inputs.in_file = run | ||
st.inputs.tr_old = tr | ||
st.inputs.tr_new = tr_new | ||
st.inputs.slicetimes = slicetimes | ||
res = st.run() | ||
|
||
def smooth_bold(processed, smooth_num, subjects): | ||
''' Smooth data prior to first-level analysis using FSL in nipype. | ||
Output will have suffix _smooth appended.''' | ||
from nipype.interfaces.fsl import Smooth | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. maybe I missed it but you did not import nipype, right? also not a python expert, but do we have to go through nipype FSL interface to smooth the data? Doesn't that pre supposes that people have FSL installed on their system? |
||
# Grab all upsampled runs | ||
bold_runs = processed.get(subject=subjects, extension='.nii.gz', suffix='stc', return_type='filename') | ||
# Smooth data before inputting to GLMsingle | ||
for run in bold_runs: | ||
print('Smoothing:', os.path.basename(run)) | ||
path = os.path.dirname(run) | ||
os.chdir(path) | ||
sm = Smooth() | ||
sm.inputs.output_type = 'NIFTI_GZ' | ||
sm.inputs.in_file = run | ||
sm.inputs.fwhm = smooth_num | ||
res = sm.run() | ||
|
||
def run_1st_level(processed, stimdur, tr_new, subjects): | ||
''' Input several runs per subject to perform cross-validation | ||
in GLMdenoise (step 3 of GLMsingle). Imaging data and event data | ||
should be appended into separate lists. ''' | ||
# Get all subjects | ||
data = [] | ||
design = [] | ||
# Loop over subjects | ||
for sub in subjects: | ||
# Grab all upsampled and smoothed runs | ||
bold_runs = processed.get(subject=sub, extension='.nii.gz', suffix='smooth', return_type='filename') | ||
# Grab all mask files for each bold run | ||
mask_runs = processed.get(subject=sub, extension='.nii.gz', suffix='mask', return_type='filename', datatype='func') | ||
mask_runsf = [m for m in mask_runs if 'res-2' in m] | ||
# Merge | ||
masked_runs = dict(zip(bold_runs, mask_runsf)) | ||
# Grab all GLMsingle-compatible event files | ||
eventfiles = processed.get(subject=sub, extension='.csv', suffix='glmsingle', return_type='filename') | ||
# Append all runs for a single subject session | ||
# fMRI data | ||
for run in masked_runs.keys(): | ||
print('Using:', os.path.dirname(masked_runs[run]), '\nto mask:', os.path.dirname(run)) | ||
r = nib.load(run).get_fdata().astype(np.float32) | ||
m = nib.load(masked_runs[run]).get_fdata().astype(np.float32) | ||
# Set all values outside brain mask to 0 | ||
zeros = np.where(m==0) | ||
r[zeros] = 0 | ||
data.append(r) | ||
# design matrices | ||
for event in eventfiles: | ||
ev = pd.read_csv(event, header=None).to_numpy(dtype='float32') | ||
design.append(ev) | ||
# Initialize model | ||
gs = GLM_single() | ||
# Name output folders | ||
metadata = processed.parse_file_entities(bold_runs[0]) | ||
outdir = os.path.join(os.path.dirname(bold_runs[0]), metadata['task'] + '_concat_glmsingle') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same comment as above to have BIDS friendly filenames |
||
# Run GLMsingle | ||
results = gs.fit( | ||
design, | ||
data, | ||
stimdur, | ||
tr_new, | ||
outputdir=outdir) | ||
# Clear data | ||
data.clear() | ||
design.clear() | ||
|
||
def main(): | ||
# Hardcoded file path to existing BIDS dataset | ||
root = '/Users/ramihamati/Documents/EmoSal_test' | ||
# Stimulus duration | ||
stimdur = 4 | ||
# Desired repetition time | ||
tr_new = 1 | ||
# Desired smoothing | ||
smooth_num = 6 | ||
# Desired subjects | ||
subjects = ['avl001'] | ||
# Execute functions | ||
''' Can comment out intermediate steps if already complete. ''' | ||
raw, processed, tr, num_slices, total_dur = grab_data(root) | ||
#make_new_df(root, raw, subjects, total_dur) | ||
#upsample_bold(processed, tr, tr_new, num_slices, subjects) | ||
#smooth_bold(processed, smooth_num, subjects) | ||
run_1st_level(processed, stimdur, tr_new, subjects) | ||
if __name__ == "__main__": | ||
# execute only if run as a script | ||
main() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
probably need to change this to make the code more portable