diff --git a/.circleci/config.yml b/.circleci/config.yml new file mode 100644 index 0000000..c423da2 --- /dev/null +++ b/.circleci/config.yml @@ -0,0 +1,29 @@ +version: 2 +jobs: + pytest: + docker: + - image: circleci/python:3.7.3 + steps: + - checkout + - run: + name: pytest + command: | + export PATH=/home/circleci/.local/bin:$PATH + mkdir test-results + pip install --user -r requirements.txt + set +e + coverage run -m pytest --junitxml=test-results/junit.xml + coverage report --include="./*" --omit="/home/circleci/.local/*" + coverage html --include="./*" --omit="/home/circleci/.local/*" + - store_test_results: + path: test-results + - store_artifacts: + path: test-results + - store_artifacts: + path: htmlcov + +workflows: + version: 2 + unit_tests: + jobs: + - pytest diff --git a/README.md b/README.md index 4d6a3f2..99a19c6 100644 --- a/README.md +++ b/README.md @@ -1 +1,90 @@ # CPAC_regtest_pack + +```bash +$ python cpac_correlations_wf.py --help +usage: cpac_correlations_wf.py [-h] [--old_outputs_path OLD_OUTPUTS_PATH] + [--new_outputs_path NEW_OUTPUTS_PATH] + [--s3_creds S3_CREDS] + [--replacements REPLACEMENTS] + [--corr_map CORR_MAP] + [--working_dir WORKING_DIR] + num_cores run_name + +positional arguments: + num_cores number of cores to use - will calculate correlations + in parallel if greater than 1 + run_name name for the correlations run + +optional arguments: + -h, --help show this help message and exit + --old_outputs_path OLD_OUTPUTS_PATH + path to a CPAC outputs directory - the folder + containing the participant-ID labeled directories + --new_outputs_path NEW_OUTPUTS_PATH + path to a CPAC outputs directory - the folder + containing the participant-ID labeled directories + --s3_creds S3_CREDS path to your AWS S3 credentials file + --replacements REPLACEMENTS + text file containing strings you wish to have removed + from the filepaths if they occur - place one on each + line + --corr_map CORR_MAP YAML file with already-calculated correlations, which + can be provided if you only want to generate the box + plots again + --working_dir WORKING_DIR + if you are correlating two working directories of a + single participant to check intermediates +``` + +```bash +$ python correlation_matrix.py --help +usage: correlation_matrix.py [-h] [--old_outputs_path OLD_OUTPUTS_PATH] + [--old_outputs_software {C-PAC,fmriprep}] + [--new_outputs_path NEW_OUTPUTS_PATH] + [--new_outputs_software {C-PAC,fmriprep}] + [--save] [--no-save] + [--subject_list SUBJECT_LIST] [--session SESSION] + [--feature_list FEATURE_LIST] + num_cores run_name + +Create a correlation matrix between two C-PAC output directories. + +positional arguments: + num_cores number of cores to use - will calculate correlations + in parallel if greater than 1 + run_name name for the correlations run + +optional arguments: + -h, --help show this help message and exit + --old_outputs_path OLD_OUTPUTS_PATH + path to an outputs directory - the folder containing + the participant-ID labeled directories + --old_outputs_software {C-PAC,fmriprep} + (default: fmriprep) + --new_outputs_path NEW_OUTPUTS_PATH + path to an outputs directory - the folder containing + the participant-ID labeled directories + --new_outputs_software {C-PAC,fmriprep} + (default: C-PAC) + --save save matrices & heatmap (default) + --no-save do not save matrices & heatmap + --subject_list SUBJECT_LIST + (default: subjects in OLD_OUTPUTS_PATH sorted by + session, subject ID). TODO: handle path to file + --session SESSION limit to a single given session (integer) + --feature_list FEATURE_LIST + TODO: handle path to file (default: ['GS', 'CSF', + 'WM', 'tCompCor0', 'aCompCor0', 'aCompCor1', + 'aCompCor2', 'aCompCor3', 'aCompCor4', 'FD']) + +The following features currently have available definitions to calculate Pearson's r between C-PAC and fmriprep: + +key feature name documentation link +-------- ------------------------ ---------------------------------------------------------------------------------- +aCompCor aCompCor https://fcp-indi.github.io/docs/user/nuisance.html#acompcor +CSF mean cerebrospinal fluid https://fcp-indi.github.io/docs/user/nuisance.html#mean-white-matter-csf +FD framewise displacement https://fcp-indi.github.io/docs/user/nuisance.html#regression-of-motion-parameters +GS global signal regression https://fcp-indi.github.io/docs/user/nuisance.html#global-signal-regression +tCompCor tCompCor https://fcp-indi.github.io/docs/user/nuisance.html#tcompcor +WM mean white matter https://fcp-indi.github.io/docs/user/nuisance.html#mean-white-matter-csf +``` \ No newline at end of file diff --git a/__init__.py b/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/configs/defaults.py b/configs/defaults.py new file mode 100644 index 0000000..d342736 --- /dev/null +++ b/configs/defaults.py @@ -0,0 +1,53 @@ +feature_headers = { + 'GS': { + 'name': 'global signal regression', + 'link': 'https://fcp-indi.github.io/docs/user/nuisance.html#' + 'global-signal-regression', + 'C-PAC': ['GlobalSignalMean0', 'GlobalSignal_mean'], + 'fmriprep': 'global_signal' + }, + 'CSF': { + 'name': 'mean cerebrospinal fluid', + 'link': 'https://fcp-indi.github.io/docs/user/nuisance.html#' + 'mean-white-matter-csf', + 'C-PAC': ['CerebrospinalFluidMean0', 'CerebrospinalFluid_mean'], + 'fmriprep': 'csf' + }, + 'WM': { + 'name': 'mean white matter', + 'link': 'https://fcp-indi.github.io/docs/user/nuisance.html#' + 'mean-white-matter-csf', + 'C-PAC': ['WhiteMatterMean0', 'WhiteMatter_mean'], + 'fmriprep': 'white_matter' + }, + 'aCompCor': { + 'name': 'aCompCor', + 'link': 'https://fcp-indi.github.io/docs/user/nuisance.html#acompcor', + 'C-PAC': ['aCompCorPC', 'aCompCor'], + 'fmriprep': 'aCompCor_comp_cor_0' + }, + 'tCompCor': { + 'name': 'tCompCor', + 'link': 'https://fcp-indi.github.io/docs/user/nuisance.html#tcompcor', + 'C-PAC': ['tCompCorPC', 'tCompCor'], + 'fmriprep': 'tCompCor_comp_cor_0' + }, + 'FD': { + 'name': 'framewise displacement', + 'link': 'https://fcp-indi.github.io/docs/user/nuisance.html#' + 'regression-of-motion-parameters' + } +} +motion_list = ['FD'] +regressor_list = [ + 'GS', + 'CSF', + 'WM', + 'tCompCor0', + 'aCompCor0', + 'aCompCor1', + 'aCompCor2', + 'aCompCor3', + 'aCompCor4' +] +software = ["C-PAC", "fmriprep"] diff --git a/configs/subjects.py b/configs/subjects.py new file mode 100644 index 0000000..6a6093f --- /dev/null +++ b/configs/subjects.py @@ -0,0 +1,162 @@ +import os + +from itertools import chain +from string import ascii_lowercase + + +def cpac_sub(sub): + """ + Function to convert a string from f"sub-{sub_number}{ses_letter}" to + f"sub-{sub_number}_ses-{ses_number}" + + Parameter + --------- + fmriprep_sub: str + + Returns + ------- + sub: str + + Example + ------- + >>> print(cpac_sub("sub-0025427a")) + sub-0025427_ses-1 + """ + return(f"{sub[:-1]}_ses-{str(ascii_lowercase.find(sub[-1])+1)}") + + +def fmriprep_sub(sub): + """ + Function to convert a string from f"sub-{sub_number}_ses-{ses_number}" to + f"sub-{sub_number}{ses_letter}" + + Parameter + --------- + sub: str + + Returns + ------- + fmriprep_sub: str + + Example + ------- + >>> print(fmriprep_sub("sub-0025427_ses-1")) + sub-0025427a + """ + return(f"{sub.split('_')[0]}{ascii_lowercase[int(sub[-1])-1]}") + + +def generate_subject_list_for_directory(path, old_outputs_software="C-PAC"): + """ + Function to take a path and return a subject list. + + Parameter + --------- + path: str + + old_outputs_software: str, optional, default="C-PAC" + + Returns + ------- + sub_list: list + """ + output = os.path.join(path, "output") + sub_ses_list = list(chain.from_iterable([[ + d for d in os.listdir( + os.path.join(output, o) + ) if all([ + os.path.isdir(os.path.join(output, o, d)), + d not in ["log", "logs"] + ]) + ] for o in os.listdir(output)])) + return(sessions_together([ + cpac_sub(s) if s[ + -1 + ] in ascii_lowercase else s for s in sub_ses_list + ])) + + +def generate_subject_list_for_range( + subject_start_stop, + session_start_stop=None +): + """ + Function to create a subject list for a given range. All values are + inclusive. + + Parameters + ---------- + subject_start_stop: 2-tuple of integers (start, stop) or list of specific + values + + session_start_stop: 2-tuple of integers (start, stop) or list of specific + values or None + + Returns + ------- + List of strings + + Example + ------- + >>> generate_subject_list_for_range((25427,25428), (1,2)) + ['sub-0025427_ses-1', 'sub-0025428_ses-1', 'sub-0025427_ses-2', 'sub-0025428_ses-2'] + """ + return([ + f'sub-00{sub}{ses_string}' for ses_string in ([ + f'_ses-{ses}' for ses in _expand_range( + session_start_stop + ) + ] if session_start_stop else [ + '' + ]) for sub in _expand_range(subject_start_stop) + ]) + + +def sessions_together(sub_list): + """ + Function to sort by session then by subject + + Parameter + --------- + sub_list: list of str + + Returns + ------- + sub_list: list of str + + Example + ------- + >>> sub_list = [ + ... 'sub-0025427_ses-1', 'sub-0025427_ses-2', 'sub-0025428_ses-1' + ... ] + >>> print(sessions_together(sub_list)) + ['sub-0025427_ses-1', 'sub-0025428_ses-1', 'sub-0025427_ses-2'] + """ + sub_list.sort() + sub_list.sort(key=lambda x: x.split("ses-")[-1]) + return(sub_list) + + +def _expand_range(tuple_or_list): + """ + Function to expand an inclusive tuple to a range or return a literal list + + Parameter + --------- + tuple_or_list: 2-tuple of integers or list + + Returns + ------- + list + """ + return( + list( + range( + tuple_or_list[0], + tuple_or_list[1] + 1) if all([ + isinstance(tuple_or_list, tuple), + len(tuple_or_list)==2, + *[isinstance(v, int) for v in tuple_or_list] + ]) else tuple_or_list + ) + ) diff --git a/correlation_matrix.py b/correlation_matrix.py new file mode 100644 index 0000000..c0ea67e --- /dev/null +++ b/correlation_matrix.py @@ -0,0 +1,529 @@ +# coding=utf-8 +import sys + +if (sys.version_info < (3, 6)): + raise EnvironmentError("This module requires Python 3.6 or newer.") + +import argparse +import glob +import numpy as np +import os +import pandas as pd +import scipy.io as sio + +from afnipy.lib_afni1D import Afni1D +from itertools import chain +from scipy.stats import pearsonr +from tabulate import tabulate + +try: + from configs.defaults import feature_headers, motion_list, regressor_list, \ + software + from configs.subjects import fmriprep_sub, \ + generate_subject_list_for_directory + from heatmaps import generate_heatmap, reshape_corrs +except ModuleNotFoundError: + from .configs.defaults import feature_headers, motion_list, regressor_list,\ + software + from .configs.subjects import fmriprep_sub, \ + generate_subject_list_for_directory + from .heatmaps import generate_heatmap, reshape_corrs + +sorted_keys = list(feature_headers.keys()) +sorted_keys.sort(key=str.lower) +feat_def_table = tabulate( + [ + [ + key, + feature_headers[key].get("name"), + feature_headers[key].get("link") + ] for key in sorted_keys + ], + headers=["key", "feature name", "documentation link"] +) +del(sorted_keys) + +def calc_corr(data1, data2): + """ + Function to calculate Pearson's r between two np.ndarrays or lists + + Parameters + ---------- + data1: np.ndarray or list + + data2: np.ndarray or list + """ + if not any([ + data1 is None, + data2 is None + ]): + if isinstance(data1, np.ndarray) and data1.shape == data2.shape: + return(pearsonr(data1.flatten(), data2.flatten())[0]) + lens = (len(data1), len(data2)) + if lens[0]==lens[1]: + return(pearsonr(data1, data2)[0]) + if lens[0]>lens[1]: + return(pearsonr(data1[lens[0]-lens[1]:], data2)[0]) + if lens[0]1 else args)) + return(parsed.pop('config'), parsed) + + +def main(config_path, save_path=None): + with open(config_path, 'r') as config_file: + config_settings = yaml.safe_load(config_file) + generate_heatmap( + reshape_corrs( + config_settings['correlation_matrix'] + ) if 'correlation_matrix' in config_settings else + defaults.correlation_matrix, + var_list=config_settings[ + 'var_list' + ] if 'var_list' in config_settings else ( + config_settings.get( + 'regressor_list', [] + ) + config_settings.get( + 'motion_list', + [] + ) + ) if any([ + l in config_settings for l in [ + 'regressor_list', + 'motion_list' + ] + ]) else ( + defaults.regressor_list + defaults.motion_list + ), + sub_list=generate_subject_list_for_range( + ( + config_settings['subjects']['start'], + config_settings['subjects']['stop'] + ) if all([ + 'subjects' in config_settings, + 'start' in config_settings['subjects'], + 'stop' in config_settings['subjects'] + ]) else config_settings[ + 'subjects' + ] if 'subjects' in config_settings else ( + defaults.subjects['start'], + defaults.subjects['stop'] + ), ( + config_settings['sessions']['start'], + config_settings['sessions']['stop'] + ) if all([ + 'sessions' in config_settings, + 'start' in config_settings['sessions'], + 'stop' in config_settings['sessions'] + ]) else config_settings[ + 'sessions' + ] if 'sessions' in config_settings else ( + defaults.sessions['start'], + defaults.sessions['stop'] + ) + ), + save_path=save_path + ) + + +if __name__ == "__main__": + parsed = parse_args(sys.argv) + main(parsed[0], **parsed[1]) diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 0000000..c1fab1c --- /dev/null +++ b/pytest.ini @@ -0,0 +1,2 @@ +[pytest] +addopts = --continue-on-collection-errors --doctest-ignore-import-errors --doctest-modules --ignore-glob="corr_two_*.py" \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..97440f2 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,11 @@ +git+https://github.com/afni/afni.git@master#egg=afnipy&subdirectory=src/python_scripts +coverage +git_python +matplotlib +nibabel +numpy +pandas +pytest +pyyaml +scipy +tabulate \ No newline at end of file diff --git a/test_cpac_correlations_wf.py b/test_cpac_correlations_wf.py index 6c1d0e7..19dfd81 100755 --- a/test_cpac_correlations_wf.py +++ b/test_cpac_correlations_wf.py @@ -2,7 +2,10 @@ def test_create_unique_file_dict(): - from cpac_correlations_wf import create_unique_file_dict + try: + from cpac_correlations_wf import create_unique_file_dict + except ModuleNotFoundError: + from .cpac_correlations_wf import create_unique_file_dict filepaths = [ "/path/sub001/centrality_outputs/_scan_rest_1/degree_centrality_weighted.nii.gz", @@ -39,7 +42,10 @@ def test_create_unique_file_dict(): def test_create_unique_file_dict_with_replacements(): - from cpac_correlations_wf import create_unique_file_dict + try: + from cpac_correlations_wf import create_unique_file_dict + except ModuleNotFoundError: + from .cpac_correlations_wf import create_unique_file_dict filepaths = [ "/path/sub001_site1/centrality_outputs/_scan_rest_1/degree_centrality_weighted.nii.gz", @@ -79,7 +85,10 @@ def test_create_unique_file_dict_with_replacements(): def test_match_filepaths(): - from cpac_correlations_wf import match_filepaths + try: + from cpac_correlations_wf import match_filepaths + except ModuleNotFoundError: + from .cpac_correlations_wf import match_filepaths old_files_dict = { 'alff_to_standard_smooth':