Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

example illustrating data preprocessing of fMRIprep outputs for use i… #64

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
187 changes: 187 additions & 0 deletions examples/example_bids_datasets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,187 @@
#!/group/tuominen/anaconda3/bin/python3
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

probably need to change this to make the code more portable

# Import libraries

import os
import numpy as np
import nibabel as nib
import pandas as pd
from bids import BIDSLayout
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I suspect you will need to add a requirements.txt to mention pybids need to be installed

from glmsingle.glmsingle import GLM_single

def grab_data(root):
''' Use pybids to easily iter over dataset generated using fMRIprep. '''
# Load data
raw = BIDSLayout(os.path.join(root,'rawdata'))
processed = BIDSLayout(os.path.join(root,'derivatives','iter4'))
# Print summary data
print(raw)
print(processed)
# Get data per run
bold_info = raw.get(extension='.nii.gz', suffix='bold', return_type='filename')[0]
metadata = raw.get_metadata(bold_info)
slicedata = nib.load(bold_info).get_fdata().astype(np.float32)
''' To make an event file GLMsingle compatible, calculate total
acquisition time rounded to nearest second. Total time will represent
number of rows in new event file. '''
# Print run data
tr = metadata['RepetitionTime']
num_slices = slicedata.shape[3]
total_dur = round(tr * num_slices)
print('TR:', tr, '\n Number of slices acquired:', num_slices,
'\n Total run duration:', total_dur)
return raw, processed, tr, num_slices, total_dur

def make_new_df(root, raw, subjects, total_dur):
''' Make an empty dataframe, # of rows is equal to total run duration (seconds),
because we will upsample the TR to 1s to reduce jitter. # of columns equal to
total unique event types in .tsv file. In this case 3 unique event types exist,
as we ignore the event type representing intertrial intervals (ITIs). '''
# Grab event files
eventfiles = raw.get(subject=subjects, extension='.tsv', suffix='events', return_type='filename')
# Load into pandas
for eventfile in eventfiles:
design = pd.read_csv(eventfile, sep='\t')
# Parse each event type, as indicated by value, where each integer represents a different event type
CSplus_shock = design.loc[design['value'] == 3]
CSplus = design.loc[design['value'] == 1]
CSminus = design.loc[design['value'] == 2]
# Make new df
new_design = pd.DataFrame(columns = ['TR', 'CSplusshock','CSplus','CSminus'])
new_design['TR'] = [i for i in range(1, (total_dur+1))]
# For each stimulus type, mark the onset of the stimulus with 1. All other values should be 0
for cs in [CSplus_shock, CSplus, CSminus]:
for i in range(0, len(cs.index)):
tr_start = int(round(cs.iloc[i,0]))
if cs is CSplus_shock:
new_design.iloc[tr_start, :] = [tr_start, 1, 0, 0]
elif cs is CSplus:
new_design.iloc[tr_start, :] = [tr_start, 0, 1, 0]
elif cs is CSminus:
new_design.iloc[tr_start, :] = [tr_start, 0, 0, 1]
else:
new_design.iloc[tr_start, :] = [tr_start, 0, 0, 0]
# Fill NaNs with 0s
new_design.fillna(0, inplace=True)
# Give the event file a new name
new_name = raw.parse_file_entities(eventfile)
new_name['suffix'] = 'events_glmsingle'
pattern = 'sub-{}_ses-{}_task-{}_run-{}_{}.csv'
# Save the event file in derivatives, exclude TR column
new_design.to_csv(os.path.join(root, 'derivatives', 'iter4',
'sub-'+new_name['subject'], 'ses-'+new_name['session'], 'func',
pattern.format(new_name['subject'],
new_name['session'], new_name['task'], new_name['run'],
new_name['suffix'])), columns=['CSplusshock','CSplus','CSminus'],
index=False, header=False)
Comment on lines +67 to +75
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You probably :
A) want to use pybids build_path methods (https://bids-standard.github.io/pybids/generated/bids.layout.writing.html#module-bids.layout.writing) that should happily accept the dict of entities you have just created.
B) want to give a BIDS friendly name to your output file: keep events as suffix but add a desc-glmsingle entity

print('glmsingle event file saved:', new_name['subject'], 'run:', new_name['run'])
def upsample_bold(processed, tr, tr_new, num_slices, subjects):
''' Upsample task-based fMRI data to 1s using pyslicetime scripts:
https://github.com/Charestlab/pyslicetime/blob/master/example_nipype.py.
Output will have suffix _stc appended.'''
from slicetime.nipype_interface import SliceTime
# Grab all preprocessed runs
bold_runs = processed.get(subject=subjects, extension='.nii.gz', suffix='bold', return_type='filename')
# Filter for resolution - here we exclude res-2
bold_runsf = [b for b in bold_runs if 'res-2' not in b]
# Import parameters
tr_old = tr
tr_new = tr_new
num_slices = num_slices
# sliceorder needs to be 1-based (see fakeout below)
slicetimes = np.flip(np.arange(0, tr_old, tr_old/num_slices, dtype=object)).tolist()
# Iter over runs
for run in bold_runsf:
print('Upsampling:', run)
st = SliceTime()
st.inputs.in_file = run
st.inputs.tr_old = tr
st.inputs.tr_new = tr_new
st.inputs.slicetimes = slicetimes
res = st.run()

def smooth_bold(processed, smooth_num, subjects):
''' Smooth data prior to first-level analysis using FSL in nipype.
Output will have suffix _smooth appended.'''
from nipype.interfaces.fsl import Smooth
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe I missed it but you did not import nipype, right?
and I don't think it is a dependency of glmsingle so this should probably go into the requirements

also not a python expert, but do we have to go through nipype FSL interface to smooth the data? Doesn't that pre supposes that people have FSL installed on their system?

# Grab all upsampled runs
bold_runs = processed.get(subject=subjects, extension='.nii.gz', suffix='stc', return_type='filename')
# Smooth data before inputting to GLMsingle
for run in bold_runs:
print('Smoothing:', os.path.basename(run))
path = os.path.dirname(run)
os.chdir(path)
sm = Smooth()
sm.inputs.output_type = 'NIFTI_GZ'
sm.inputs.in_file = run
sm.inputs.fwhm = smooth_num
res = sm.run()

def run_1st_level(processed, stimdur, tr_new, subjects):
''' Input several runs per subject to perform cross-validation
in GLMdenoise (step 3 of GLMsingle). Imaging data and event data
should be appended into separate lists. '''
# Get all subjects
data = []
design = []
# Loop over subjects
for sub in subjects:
# Grab all upsampled and smoothed runs
bold_runs = processed.get(subject=sub, extension='.nii.gz', suffix='smooth', return_type='filename')
# Grab all mask files for each bold run
mask_runs = processed.get(subject=sub, extension='.nii.gz', suffix='mask', return_type='filename', datatype='func')
mask_runsf = [m for m in mask_runs if 'res-2' in m]
# Merge
masked_runs = dict(zip(bold_runs, mask_runsf))
# Grab all GLMsingle-compatible event files
eventfiles = processed.get(subject=sub, extension='.csv', suffix='glmsingle', return_type='filename')
# Append all runs for a single subject session
# fMRI data
for run in masked_runs.keys():
print('Using:', os.path.dirname(masked_runs[run]), '\nto mask:', os.path.dirname(run))
r = nib.load(run).get_fdata().astype(np.float32)
m = nib.load(masked_runs[run]).get_fdata().astype(np.float32)
# Set all values outside brain mask to 0
zeros = np.where(m==0)
r[zeros] = 0
data.append(r)
# design matrices
for event in eventfiles:
ev = pd.read_csv(event, header=None).to_numpy(dtype='float32')
design.append(ev)
# Initialize model
gs = GLM_single()
# Name output folders
metadata = processed.parse_file_entities(bold_runs[0])
outdir = os.path.join(os.path.dirname(bold_runs[0]), metadata['task'] + '_concat_glmsingle')
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same comment as above to have BIDS friendly filenames

# Run GLMsingle
results = gs.fit(
design,
data,
stimdur,
tr_new,
outputdir=outdir)
# Clear data
data.clear()
design.clear()

def main():
# Hardcoded file path to existing BIDS dataset
root = '/Users/ramihamati/Documents/EmoSal_test'
# Stimulus duration
stimdur = 4
# Desired repetition time
tr_new = 1
# Desired smoothing
smooth_num = 6
# Desired subjects
subjects = ['avl001']
# Execute functions
''' Can comment out intermediate steps if already complete. '''
raw, processed, tr, num_slices, total_dur = grab_data(root)
#make_new_df(root, raw, subjects, total_dur)
#upsample_bold(processed, tr, tr_new, num_slices, subjects)
#smooth_bold(processed, smooth_num, subjects)
run_1st_level(processed, stimdur, tr_new, subjects)
if __name__ == "__main__":
# execute only if run as a script
main()