From c79fb8027fd8185384ed132a3ffa4c5f0e13a2e2 Mon Sep 17 00:00:00 2001 From: JOULOT Matthieu Date: Mon, 23 Oct 2023 10:58:07 +0200 Subject: [PATCH 01/16] improvements of Genfi --- .../converters/genfi_to_bids/genfi_to_bids.py | 19 ++++++-- .../genfi_to_bids/genfi_to_bids_cli.py | 12 ++++- .../genfi_to_bids/genfi_to_bids_utils.py | 46 +++++++++++-------- 3 files changed, 51 insertions(+), 26 deletions(-) diff --git a/clinica/iotools/converters/genfi_to_bids/genfi_to_bids.py b/clinica/iotools/converters/genfi_to_bids/genfi_to_bids.py index ae3b43e5b..26c8ae2c4 100644 --- a/clinica/iotools/converters/genfi_to_bids/genfi_to_bids.py +++ b/clinica/iotools/converters/genfi_to_bids/genfi_to_bids.py @@ -9,6 +9,7 @@ def convert_images( bids_dir: PathLike, path_to_clinical: Optional[PathLike], gif: bool, + path_to_clinical_tsv: Optional[PathLike], ) -> None: """Convert the entire dataset to BIDS. @@ -30,6 +31,10 @@ def convert_images( gif: bool If True, indicates the user wants to have the values of the gif parcellation + + path_to_clinical_tsv: PathLike, optional + Path to a tsv containing the data the additionnal data the user wants to have in the BIDS + If None, no additionnal data will be added. """ import os @@ -43,29 +48,33 @@ def convert_images( merge_imaging_data, read_imaging_data, write_bids, + check_clinical_path, ) + #check that if a clinical tsv is given, a path to the clinical data is given as well + if path_to_clinical_tsv: + check_clinical_path(path_to_clinical) # read the clinical data files if path_to_clinical: - df_demographics, df_imaging, df_clinical = find_clinical_data(path_to_clinical) + df_demographics, df_imaging, df_clinical, df_biosamples, df_neuropsych = find_clinical_data(path_to_clinical) + # makes a df of the imaging data imaging_data = read_imaging_data(path_to_dataset) - + # complete the data extracted imaging_data = merge_imaging_data(imaging_data) # complete clinical data if path_to_clinical: df_clinical_complete = complete_clinical_data( - df_demographics, df_imaging, df_clinical + df_demographics, df_imaging, df_clinical, df_biosamples, df_neuropsych ) - # intersect the data if path_to_clinical: df_complete = intersect_data(imaging_data, df_clinical_complete) else: df_complete = imaging_data # build the tsv - results = dataset_to_bids(df_complete, gif) + results = dataset_to_bids(df_complete, gif, path_to_clinical_tsv) write_bids( to=bids_dir, participants=results["participants"], diff --git a/clinica/iotools/converters/genfi_to_bids/genfi_to_bids_cli.py b/clinica/iotools/converters/genfi_to_bids/genfi_to_bids_cli.py index b498afc51..edb1590e0 100644 --- a/clinica/iotools/converters/genfi_to_bids/genfi_to_bids_cli.py +++ b/clinica/iotools/converters/genfi_to_bids/genfi_to_bids_cli.py @@ -14,17 +14,25 @@ ) gif = click.option("-gif", is_flag=True, help="Add values from gif to session.tsv") - +clinical_data_tsv = click.option( + "-cdt", + "--clinical-data-tsv", + "clinical_data_tsv", + type=click.Path(exists=True, file_okay=True, resolve_path=True), + help="Path to a tsv containing additionnal clinical data you want to have in the BIDS", +) @click.command(name="genfi-to-bids") @cli_param.dataset_directory @cli_param.bids_directory @clinical_data_directory @gif +@clinical_data_tsv def cli( dataset_directory: PathLike, bids_directory: PathLike, clinical_data_directory: Optional[PathLike] = None, + clinical_data_tsv: Optional[PathLike] = None, gif: bool = False, ) -> None: """GENFI to BIDS converter. @@ -39,7 +47,7 @@ def cli( check_dcm2niix() - convert_images(dataset_directory, bids_directory, clinical_data_directory, gif) + convert_images(dataset_directory, bids_directory, clinical_data_directory, gif, clinical_data_tsv) _write_bidsignore(str(bids_directory)) cprint("Conversion to BIDS succeeded.") diff --git a/clinica/iotools/converters/genfi_to_bids/genfi_to_bids_utils.py b/clinica/iotools/converters/genfi_to_bids/genfi_to_bids_utils.py index cb527bc5a..04a0ef8c4 100644 --- a/clinica/iotools/converters/genfi_to_bids/genfi_to_bids_utils.py +++ b/clinica/iotools/converters/genfi_to_bids/genfi_to_bids_utils.py @@ -7,7 +7,12 @@ import pydicom as pdcm from pandas import DataFrame - +def check_clinical_path(path_to_clinical_data: PathLike) -> None: + if path_to_clinical_data: + return + else: + raise ValueError("Missing a clinical_data_path.") + def find_dicoms(path_to_source_data: PathLike) -> Iterable[Tuple[PathLike, PathLike]]: """Find the dicoms in the given directory. @@ -150,6 +155,8 @@ def find_clinical_data( "FINAL*DEMOGRAPHICS*.xlsx", "FINAL*IMAGING*.xlsx", "FINAL*CLINICAL*.xlsx", + "FINAL*BIOSAMPLES*.xlsx", + "FINAL*NEUROPSYCH*.xlsx", ) ) @@ -168,7 +175,7 @@ def _read_file(data_file: PathLike) -> pd.DataFrame: def complete_clinical_data( - df_demographics: DataFrame, df_imaging: DataFrame, df_clinical: DataFrame + df_demographics: DataFrame, df_imaging: DataFrame, df_clinical: DataFrame, df_biosamples: DataFrame, df_neuropsych: DataFrame ) -> DataFrame: """Merges the different clincal dataframes into one. @@ -192,24 +199,13 @@ def complete_clinical_data( df_clinical_complete = df_imaging.merge( df_demographics, how="inner", on=merge_key ).drop(columns="diagnosis") + df_clinical_complete = df_clinical_complete.merge(df_biosamples, how="inner", on=merge_key) + df_clinical_complete = df_clinical_complete.merge(df_neuropsych, how="inner", on=merge_key) df_clinical = df_clinical.dropna(subset=merge_key) - return df_clinical_complete.merge( - df_clinical[ - [ - "blinded_code", - "blinded_site", - "visit", - "diagnosis", - "ftld-cdr-global", - "cdr-sob", - ] - ], - how="inner", - on=merge_key, - ) + return df_clinical_complete.merge(df_clinical, how="inner", on=merge_key) -def dataset_to_bids(complete_data_df: DataFrame, gif: bool) -> Dict[str, DataFrame]: +def dataset_to_bids(complete_data_df: DataFrame, gif: bool, path_to_clinical_tsv: PathLike) -> Dict[str, DataFrame]: """Selects the data needed to write the participants, sessions, and scans tsvs. Parameters @@ -241,11 +237,23 @@ def dataset_to_bids(complete_data_df: DataFrame, gif: bool) -> Dict[str, DataFra "genfi_ref.csv", ) df_ref = pd.read_csv(path_to_ref_csv, sep=";") - + #add additionnal data through csv + additionnal_data_df = pd.read_csv(path_to_clinical_tsv, sep="\t") + + #hard written path soon to be changed + map_to_level_df = pd.read_csv("/Users/matthieu.joulot/Desktop/clinical_data_dest.tsv", sep="\t") + pre_addi_df = map_to_level_df.merge(additionnal_data_df, how="inner", on="data") + session_addi_list = pre_addi_df["data"][pre_addi_df["dest"]=="sessions"].values.tolist() + participants_addi_list = pre_addi_df["data"][pre_addi_df["dest"]=="participants"].values.tolist() + scan_addi_list = pre_addi_df["data"][pre_addi_df["dest"]=="scans"].values.tolist() + + addi_df=pd.DataFrame([participants_addi_list,session_addi_list, scan_addi_list]).transpose() + addi_df.columns = ["participants", "sessions", "scans"] if not gif: df_ref = df_ref.head(8) + df_to_write = pd.concat([df_ref, addi_df]) return { - col: complete_data_df.filter(items=list(df_ref[col])) + col: complete_data_df.filter(items=list(df_to_write[col])) for col in ["participants", "sessions", "scans"] } From 4972324cb63074730188f5afb204834cf68d6dc4 Mon Sep 17 00:00:00 2001 From: JOULOT Matthieu Date: Mon, 23 Oct 2023 12:06:45 +0200 Subject: [PATCH 02/16] Revert "improvements of Genfi" This reverts commit c79fb8027fd8185384ed132a3ffa4c5f0e13a2e2. --- .../converters/genfi_to_bids/genfi_to_bids.py | 19 ++------ .../genfi_to_bids/genfi_to_bids_cli.py | 12 +---- .../genfi_to_bids/genfi_to_bids_utils.py | 46 ++++++++----------- 3 files changed, 26 insertions(+), 51 deletions(-) diff --git a/clinica/iotools/converters/genfi_to_bids/genfi_to_bids.py b/clinica/iotools/converters/genfi_to_bids/genfi_to_bids.py index 26c8ae2c4..ae3b43e5b 100644 --- a/clinica/iotools/converters/genfi_to_bids/genfi_to_bids.py +++ b/clinica/iotools/converters/genfi_to_bids/genfi_to_bids.py @@ -9,7 +9,6 @@ def convert_images( bids_dir: PathLike, path_to_clinical: Optional[PathLike], gif: bool, - path_to_clinical_tsv: Optional[PathLike], ) -> None: """Convert the entire dataset to BIDS. @@ -31,10 +30,6 @@ def convert_images( gif: bool If True, indicates the user wants to have the values of the gif parcellation - - path_to_clinical_tsv: PathLike, optional - Path to a tsv containing the data the additionnal data the user wants to have in the BIDS - If None, no additionnal data will be added. """ import os @@ -48,33 +43,29 @@ def convert_images( merge_imaging_data, read_imaging_data, write_bids, - check_clinical_path, ) - #check that if a clinical tsv is given, a path to the clinical data is given as well - if path_to_clinical_tsv: - check_clinical_path(path_to_clinical) # read the clinical data files if path_to_clinical: - df_demographics, df_imaging, df_clinical, df_biosamples, df_neuropsych = find_clinical_data(path_to_clinical) - + df_demographics, df_imaging, df_clinical = find_clinical_data(path_to_clinical) # makes a df of the imaging data imaging_data = read_imaging_data(path_to_dataset) - + # complete the data extracted imaging_data = merge_imaging_data(imaging_data) # complete clinical data if path_to_clinical: df_clinical_complete = complete_clinical_data( - df_demographics, df_imaging, df_clinical, df_biosamples, df_neuropsych + df_demographics, df_imaging, df_clinical ) + # intersect the data if path_to_clinical: df_complete = intersect_data(imaging_data, df_clinical_complete) else: df_complete = imaging_data # build the tsv - results = dataset_to_bids(df_complete, gif, path_to_clinical_tsv) + results = dataset_to_bids(df_complete, gif) write_bids( to=bids_dir, participants=results["participants"], diff --git a/clinica/iotools/converters/genfi_to_bids/genfi_to_bids_cli.py b/clinica/iotools/converters/genfi_to_bids/genfi_to_bids_cli.py index edb1590e0..b498afc51 100644 --- a/clinica/iotools/converters/genfi_to_bids/genfi_to_bids_cli.py +++ b/clinica/iotools/converters/genfi_to_bids/genfi_to_bids_cli.py @@ -14,25 +14,17 @@ ) gif = click.option("-gif", is_flag=True, help="Add values from gif to session.tsv") -clinical_data_tsv = click.option( - "-cdt", - "--clinical-data-tsv", - "clinical_data_tsv", - type=click.Path(exists=True, file_okay=True, resolve_path=True), - help="Path to a tsv containing additionnal clinical data you want to have in the BIDS", -) + @click.command(name="genfi-to-bids") @cli_param.dataset_directory @cli_param.bids_directory @clinical_data_directory @gif -@clinical_data_tsv def cli( dataset_directory: PathLike, bids_directory: PathLike, clinical_data_directory: Optional[PathLike] = None, - clinical_data_tsv: Optional[PathLike] = None, gif: bool = False, ) -> None: """GENFI to BIDS converter. @@ -47,7 +39,7 @@ def cli( check_dcm2niix() - convert_images(dataset_directory, bids_directory, clinical_data_directory, gif, clinical_data_tsv) + convert_images(dataset_directory, bids_directory, clinical_data_directory, gif) _write_bidsignore(str(bids_directory)) cprint("Conversion to BIDS succeeded.") diff --git a/clinica/iotools/converters/genfi_to_bids/genfi_to_bids_utils.py b/clinica/iotools/converters/genfi_to_bids/genfi_to_bids_utils.py index 04a0ef8c4..cb527bc5a 100644 --- a/clinica/iotools/converters/genfi_to_bids/genfi_to_bids_utils.py +++ b/clinica/iotools/converters/genfi_to_bids/genfi_to_bids_utils.py @@ -7,12 +7,7 @@ import pydicom as pdcm from pandas import DataFrame -def check_clinical_path(path_to_clinical_data: PathLike) -> None: - if path_to_clinical_data: - return - else: - raise ValueError("Missing a clinical_data_path.") - + def find_dicoms(path_to_source_data: PathLike) -> Iterable[Tuple[PathLike, PathLike]]: """Find the dicoms in the given directory. @@ -155,8 +150,6 @@ def find_clinical_data( "FINAL*DEMOGRAPHICS*.xlsx", "FINAL*IMAGING*.xlsx", "FINAL*CLINICAL*.xlsx", - "FINAL*BIOSAMPLES*.xlsx", - "FINAL*NEUROPSYCH*.xlsx", ) ) @@ -175,7 +168,7 @@ def _read_file(data_file: PathLike) -> pd.DataFrame: def complete_clinical_data( - df_demographics: DataFrame, df_imaging: DataFrame, df_clinical: DataFrame, df_biosamples: DataFrame, df_neuropsych: DataFrame + df_demographics: DataFrame, df_imaging: DataFrame, df_clinical: DataFrame ) -> DataFrame: """Merges the different clincal dataframes into one. @@ -199,13 +192,24 @@ def complete_clinical_data( df_clinical_complete = df_imaging.merge( df_demographics, how="inner", on=merge_key ).drop(columns="diagnosis") - df_clinical_complete = df_clinical_complete.merge(df_biosamples, how="inner", on=merge_key) - df_clinical_complete = df_clinical_complete.merge(df_neuropsych, how="inner", on=merge_key) df_clinical = df_clinical.dropna(subset=merge_key) - return df_clinical_complete.merge(df_clinical, how="inner", on=merge_key) + return df_clinical_complete.merge( + df_clinical[ + [ + "blinded_code", + "blinded_site", + "visit", + "diagnosis", + "ftld-cdr-global", + "cdr-sob", + ] + ], + how="inner", + on=merge_key, + ) -def dataset_to_bids(complete_data_df: DataFrame, gif: bool, path_to_clinical_tsv: PathLike) -> Dict[str, DataFrame]: +def dataset_to_bids(complete_data_df: DataFrame, gif: bool) -> Dict[str, DataFrame]: """Selects the data needed to write the participants, sessions, and scans tsvs. Parameters @@ -237,23 +241,11 @@ def dataset_to_bids(complete_data_df: DataFrame, gif: bool, path_to_clinical_tsv "genfi_ref.csv", ) df_ref = pd.read_csv(path_to_ref_csv, sep=";") - #add additionnal data through csv - additionnal_data_df = pd.read_csv(path_to_clinical_tsv, sep="\t") - - #hard written path soon to be changed - map_to_level_df = pd.read_csv("/Users/matthieu.joulot/Desktop/clinical_data_dest.tsv", sep="\t") - pre_addi_df = map_to_level_df.merge(additionnal_data_df, how="inner", on="data") - session_addi_list = pre_addi_df["data"][pre_addi_df["dest"]=="sessions"].values.tolist() - participants_addi_list = pre_addi_df["data"][pre_addi_df["dest"]=="participants"].values.tolist() - scan_addi_list = pre_addi_df["data"][pre_addi_df["dest"]=="scans"].values.tolist() - - addi_df=pd.DataFrame([participants_addi_list,session_addi_list, scan_addi_list]).transpose() - addi_df.columns = ["participants", "sessions", "scans"] + if not gif: df_ref = df_ref.head(8) - df_to_write = pd.concat([df_ref, addi_df]) return { - col: complete_data_df.filter(items=list(df_to_write[col])) + col: complete_data_df.filter(items=list(df_ref[col])) for col in ["participants", "sessions", "scans"] } From da7ab47a7473b67b37f4fbede538b35db2e10827 Mon Sep 17 00:00:00 2001 From: JOULOT Matthieu Date: Wed, 15 Nov 2023 16:00:16 +0100 Subject: [PATCH 03/16] factorize clinical data reading --- clinica/iotools/bids_utils.py | 4 ++- .../adni_modalities/adni_av45_fbb_pet.py | 16 ++++++------ .../adni_to_bids/adni_modalities/adni_dwi.py | 19 +++++--------- .../adni_modalities/adni_fdg_pet.py | 16 ++++++------ .../adni_modalities/adni_flair.py | 22 ++++++---------- .../adni_to_bids/adni_modalities/adni_fmri.py | 24 ++++++------------ .../adni_modalities/adni_pib_pet.py | 12 ++++----- .../adni_to_bids/adni_modalities/adni_t1.py | 25 ++++++------------- .../adni_modalities/adni_tau_pet.py | 14 +++++------ .../converters/adni_to_bids/adni_to_bids.py | 12 ++++----- .../converters/adni_to_bids/adni_utils.py | 17 ++++++++++--- 11 files changed, 77 insertions(+), 104 deletions(-) diff --git a/clinica/iotools/bids_utils.py b/clinica/iotools/bids_utils.py index f3059333a..03edc32d0 100644 --- a/clinica/iotools/bids_utils.py +++ b/clinica/iotools/bids_utils.py @@ -69,6 +69,8 @@ def create_participants_df( import pandas as pd from clinica.utils.stream import cprint + from clinica.iotools.converters.adni_to_bids.adni_utils import load_clinical_csv + fields_bids = ["participant_id"] prev_location = "" @@ -111,7 +113,7 @@ def create_participants_df( if file_ext == ".xlsx": file_to_read = pd.read_excel(file_to_read_path, sheet_name=sheet) elif file_ext == ".csv": - file_to_read = pd.read_csv(file_to_read_path) + file_to_read = load_clinical_csv(clinical_data_dir, location.split(".")[0]) prev_location = location prev_sheet = sheet diff --git a/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_av45_fbb_pet.py b/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_av45_fbb_pet.py index 49beb07e8..1db5056ef 100644 --- a/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_av45_fbb_pet.py +++ b/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_av45_fbb_pet.py @@ -44,12 +44,11 @@ def convert_adni_av45_fbb_pet( import pandas as pd - from clinica.iotools.converters.adni_to_bids.adni_utils import paths_to_bids + from clinica.iotools.converters.adni_to_bids.adni_utils import paths_to_bids, load_clinical_csv from clinica.utils.stream import cprint if not subjects: - adni_merge_path = path.join(csv_dir, "ADNIMERGE.csv") - adni_merge = pd.read_csv(adni_merge_path, sep=",", low_memory=False) + adni_merge = load_clinical_csv(csv_dir, "ADNIMERGE") subjects = list(adni_merge.PTID.unique()) cprint( @@ -89,6 +88,7 @@ def compute_av45_fbb_pet_paths(source_dir, csv_dir, subjs_list, conversion_dir): from clinica.iotools.converters.adni_to_bids.adni_utils import ( find_image_path, get_images_pet, + load_clinical_csv ) pet_amyloid_col = [ @@ -108,12 +108,10 @@ def compute_av45_fbb_pet_paths(source_dir, csv_dir, subjs_list, conversion_dir): pet_amyloid_dfs_list = [] # Loading needed .csv files - av45qc = pd.read_csv(path.join(csv_dir, "AV45QC.csv"), sep=",", low_memory=False) - amyqc = pd.read_csv(path.join(csv_dir, "AMYQC.csv"), sep=",", low_memory=False) - pet_meta_list = pd.read_csv( - path.join(csv_dir, "PET_META_LIST.csv"), sep=",", low_memory=False - ) - + av45qc = load_clinical_csv(csv_dir, "AV45QC") + amyqc = load_clinical_csv(csv_dir, "AMYQC") + pet_meta_list = load_clinical_csv(csv_dir, "PET_META_LIST") + for subj in subjs_list: # PET images metadata for subject diff --git a/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_dwi.py b/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_dwi.py index 5e1392a81..9f1d8c556 100644 --- a/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_dwi.py +++ b/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_dwi.py @@ -44,12 +44,11 @@ def convert_adni_dwi( import pandas as pd - from clinica.iotools.converters.adni_to_bids.adni_utils import paths_to_bids + from clinica.iotools.converters.adni_to_bids.adni_utils import paths_to_bids, load_clinical_csv from clinica.utils.stream import cprint if not subjects: - adni_merge_path = path.join(csv_dir, "ADNIMERGE.csv") - adni_merge = pd.read_csv(adni_merge_path, sep=",", low_memory=False) + adni_merge = load_clinical_csv(csv_dir, "ADNIMERGE") subjects = list(adni_merge.PTID.unique()) cprint( @@ -83,6 +82,7 @@ def compute_dwi_paths(source_dir, csv_dir, subjs_list, conversion_dir): from clinica.iotools.converters.adni_to_bids.adni_utils import ( find_image_path, visits_to_timepoints, + load_clinical_csv, ) dwi_col_df = [ @@ -100,18 +100,11 @@ def compute_dwi_paths(source_dir, csv_dir, subjs_list, conversion_dir): dwi_dfs_list = [] # Loading needed .csv files - adni_merge = pd.read_csv( - path.join(csv_dir, "ADNIMERGE.csv"), sep=",", low_memory=False - ) - - mayo_mri_qc = pd.read_csv( - path.join(csv_dir, "MAYOADIRL_MRI_IMAGEQC_12_08_15.csv"), - sep=",", - low_memory=False, - ) + adni_merge = load_clinical_csv(csv_dir, "ADNIMERGE") + mayo_mri_qc = load_clinical_csv(csv_dir, "MAYOADIRL_MRI_IMAGEQC_12_08_15") mayo_mri_qc = mayo_mri_qc[mayo_mri_qc.series_type == "DTI"] - mri_list = pd.read_csv(path.join(csv_dir, "MRILIST.csv"), sep=",", low_memory=False) + mri_list = load_clinical_csv(csv_dir, "MRILIST") # Selecting only DTI images that are not Multiband, processed or enhanced images mri_list = mri_list[mri_list.SEQUENCE.str.contains("dti", case=False, na=False)] diff --git a/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_fdg_pet.py b/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_fdg_pet.py index d79aa324b..11be2cf98 100644 --- a/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_fdg_pet.py +++ b/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_fdg_pet.py @@ -86,13 +86,12 @@ def _convert_adni_fdg_pet( import pandas as pd - from clinica.iotools.converters.adni_to_bids.adni_utils import paths_to_bids + from clinica.iotools.converters.adni_to_bids.adni_utils import paths_to_bids, load_clinical_csv from clinica.utils.stream import cprint if subjects is None: - adni_merge = pd.read_csv( - Path(csv_dir) / "ADNIMERGE.csv", sep=",", low_memory=False - ) + adni_merge = load_clinical_csv(csv_dir, "ADNIMERGE") + subjects = list(adni_merge.PTID.unique()) cprint( "Calculating paths of FDG PET images. " @@ -227,7 +226,8 @@ def _load_df_with_column_check( csv_dir: Path, filename: str, required_columns: Set[str] ) -> pd.DataFrame: """Load the requested CSV file in a dataframe and check that the requested columns are present.""" - df = pd.read_csv(csv_dir / filename, sep=",", low_memory=False) + from clinica.iotools.converters.adni_to_bids.adni_utils import load_clinical_csv + df = load_clinical_csv(csv_dir, filename) if not required_columns.issubset(set(df.columns)): raise ValueError( f"Missing column(s) from {filename} file." @@ -238,17 +238,17 @@ def _load_df_with_column_check( _get_pet_qc_df = partial( _load_df_with_column_check, - filename="PETQC.csv", + filename="PETQC", required_columns={"PASS", "RID"}, ) _get_qc_adni_3_df = partial( _load_df_with_column_check, - filename="PETC3.csv", + filename="PETC3", required_columns={"SCANQLTY", "RID", "SCANDATE"}, ) _get_meta_list_df = partial( _load_df_with_column_check, - filename="PET_META_LIST.csv", + filename="PET_META_LIST", required_columns={"Subject"}, ) diff --git a/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_flair.py b/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_flair.py index 3fce39c58..6d37e129b 100644 --- a/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_flair.py +++ b/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_flair.py @@ -44,12 +44,11 @@ def convert_adni_flair( import pandas as pd - from clinica.iotools.converters.adni_to_bids.adni_utils import paths_to_bids + from clinica.iotools.converters.adni_to_bids.adni_utils import paths_to_bids, load_clinical_csv from clinica.utils.stream import cprint if not subjects: - adni_merge_path = path.join(csv_dir, "ADNIMERGE.csv") - adni_merge = pd.read_csv(adni_merge_path, sep=",", low_memory=False) + adni_merge = load_clinical_csv(csv_dir, "ADNIMERGE") subjects = list(adni_merge.PTID.unique()) cprint( @@ -83,6 +82,7 @@ def compute_flair_paths(source_dir, csv_dir, subjs_list, conversion_dir): from clinica.iotools.converters.adni_to_bids.adni_utils import ( find_image_path, visits_to_timepoints, + load_clinical_csv, ) flair_col_df = [ @@ -101,19 +101,11 @@ def compute_flair_paths(source_dir, csv_dir, subjs_list, conversion_dir): flair_dfs_list = [] # Loading needed .csv files - adni_merge = pd.read_csv( - path.join(csv_dir, "ADNIMERGE.csv"), sep=",", low_memory=False - ) - - mayo_mri_qc = pd.read_csv( - path.join(csv_dir, "MAYOADIRL_MRI_IMAGEQC_12_08_15.csv"), - sep=",", - low_memory=False, - ) + adni_merge = load_clinical_csv(csv_dir, "ADNIMERGE") + mayo_mri_qc = load_clinical_csv(csv_dir, "MAYOADIRL_MRI_IMAGEQC_12_08_15") mayo_mri_qc = mayo_mri_qc[mayo_mri_qc.series_type == "AFL"] - - mri_list = pd.read_csv(path.join(csv_dir, "MRILIST.csv"), sep=",", low_memory=False) - + mri_list = load_clinical_csv(csv_dir, "MRILIST") + # Selecting FLAIR DTI images that are not MPR mri_list = mri_list[mri_list.SEQUENCE.str.contains("flair", case=False, na=False)] unwanted_sequences = ["_MPR_"] diff --git a/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_fmri.py b/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_fmri.py index 4c9ed0642..8bdcec29c 100644 --- a/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_fmri.py +++ b/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_fmri.py @@ -44,12 +44,11 @@ def convert_adni_fmri( import pandas as pd - from clinica.iotools.converters.adni_to_bids.adni_utils import paths_to_bids + from clinica.iotools.converters.adni_to_bids.adni_utils import paths_to_bids, load_clinical_csv from clinica.utils.stream import cprint if not subjects: - adni_merge_path = path.join(csv_dir, "ADNIMERGE.csv") - adni_merge = pd.read_csv(adni_merge_path, sep=",", low_memory=False) + adni_merge = load_clinical_csv(csv_dir, "ADNIMERGE") subjects = list(adni_merge.PTID.unique()) cprint( @@ -83,6 +82,7 @@ def compute_fmri_path(source_dir, csv_dir, subjs_list, conversion_dir): from clinica.iotools.converters.adni_to_bids.adni_utils import ( find_image_path, visits_to_timepoints, + load_clinical_csv, ) fmri_col = [ @@ -100,29 +100,21 @@ def compute_fmri_path(source_dir, csv_dir, subjs_list, conversion_dir): fmri_dfs_list = [] # Loading needed .csv files - adni_merge = pd.read_csv( - path.join(csv_dir, "ADNIMERGE.csv"), sep=",", low_memory=False - ) + adni_merge = load_clinical_csv(csv_dir, "ADNIMERGE") - mayo_mri_qc = pd.read_csv( - path.join(csv_dir, "MAYOADIRL_MRI_IMAGEQC_12_08_15.csv"), - sep=",", - low_memory=False, - ) + mayo_mri_qc = load_clinical_csv(csv_dir, "MAYOADIRL_MRI_IMAGEQC_12_08_15") + mayo_mri_qc = mayo_mri_qc[mayo_mri_qc.series_type == "fMRI"] mayo_mri_qc.columns = [x.upper() for x in mayo_mri_qc.columns] - mayo_mri_qc3 = pd.read_csv( - path.join(csv_dir, "MAYOADIRL_MRI_QUALITY_ADNI3.csv"), sep=",", low_memory=False - ) + mayo_mri_qc3 = load_clinical_csv(csv_dir, "MAYOADIRL_MRI_QUALITY_ADNI3") mayo_mri_qc3 = mayo_mri_qc3[mayo_mri_qc3.SERIES_TYPE == "EPB"] # Concatenating visits in both QC files mayo_mri_qc = pd.concat( [mayo_mri_qc, mayo_mri_qc3], axis=0, ignore_index=True, sort=False ) - - mri_list = pd.read_csv(path.join(csv_dir, "MRILIST.csv"), sep=",", low_memory=False) + mri_list = load_clinical_csv(csv_dir, "MRILIST") # Selecting only fMRI images that are not Multiband mri_list = mri_list[ diff --git a/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_pib_pet.py b/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_pib_pet.py index baebd8170..5216767e2 100644 --- a/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_pib_pet.py +++ b/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_pib_pet.py @@ -44,12 +44,11 @@ def convert_adni_pib_pet( import pandas as pd - from clinica.iotools.converters.adni_to_bids.adni_utils import paths_to_bids + from clinica.iotools.converters.adni_to_bids.adni_utils import paths_to_bids, load_clinical_csv from clinica.utils.stream import cprint if not subjects: - adni_merge_path = path.join(csv_dir, "ADNIMERGE.csv") - adni_merge = pd.read_csv(adni_merge_path, sep=",", low_memory=False) + adni_merge = load_clinical_csv(csv_dir, "ADNIMERGE") subjects = list(adni_merge.PTID.unique()) cprint( @@ -82,6 +81,7 @@ def compute_pib_pet_paths(source_dir, csv_dir, subjs_list, conversion_dir): from clinica.iotools.converters.adni_to_bids.adni_utils import ( find_image_path, get_images_pet, + load_clinical_csv, ) from clinica.utils.pet import Tracer @@ -101,10 +101,8 @@ def compute_pib_pet_paths(source_dir, csv_dir, subjs_list, conversion_dir): pet_pib_dfs_list = [] # Loading needed .csv files - pibqc = pd.read_csv(path.join(csv_dir, "PIBQC.csv"), sep=",", low_memory=False) - pet_meta_list = pd.read_csv( - path.join(csv_dir, "PET_META_LIST.csv"), sep=",", low_memory=False - ) + pibqc = load_clinical_csv(csv_dir, "PIBQC") + pet_meta_list = load_clinical_csv(csv_dir, "PET_META_LIST") for subj in subjs_list: diff --git a/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_t1.py b/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_t1.py index 3184814fd..e1a2c573d 100644 --- a/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_t1.py +++ b/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_t1.py @@ -44,12 +44,11 @@ def convert_adni_t1( from pandas.io import parsers - from clinica.iotools.converters.adni_to_bids.adni_utils import paths_to_bids + from clinica.iotools.converters.adni_to_bids.adni_utils import paths_to_bids, load_clinical_csv from clinica.utils.stream import cprint if not subjects: - adni_merge_path = path.join(csv_dir, "ADNIMERGE.csv") - adni_merge = parsers.read_csv(adni_merge_path, sep=",", low_memory=False) + adni_merge = load_clinical_csv(csv_dir, "ADNIMERGE") subjects = list(adni_merge.PTID.unique()) cprint( @@ -82,6 +81,7 @@ def compute_t1_paths(source_dir, csv_dir, subjs_list, conversion_dir): from clinica.iotools.converters.adni_to_bids.adni_utils import ( find_image_path, visits_to_timepoints, + load_clinical_csv ) from clinica.utils.stream import cprint @@ -101,20 +101,11 @@ def compute_t1_paths(source_dir, csv_dir, subjs_list, conversion_dir): t1_dfs_list = [] # Loading needed .csv files - adni_merge = pd.read_csv( - path.join(csv_dir, "ADNIMERGE.csv"), sep=",", low_memory=False - ) - mprage_meta = pd.read_csv( - path.join(csv_dir, "MPRAGEMETA.csv"), sep=",", low_memory=False - ) - mri_quality = pd.read_csv( - path.join(csv_dir, "MRIQUALITY.csv"), sep=",", low_memory=False - ) - mayo_mri_qc = pd.read_csv( - path.join(csv_dir, "MAYOADIRL_MRI_IMAGEQC_12_08_15.csv"), - sep=",", - low_memory=False, - ) + adni_merge = load_clinical_csv(csv_dir, "ADNIMERGE") + mprage_meta = load_clinical_csv(csv_dir, "MPRAGEMETA") + mri_quality = load_clinical_csv(csv_dir, "MRIQUALITY") + mayo_mri_qc = load_clinical_csv(csv_dir, "MAYOADIRL_MRI_IMAGEQC_12_08_15") + # Keep only T1 scans mayo_mri_qc = mayo_mri_qc[mayo_mri_qc.series_type == "T1"] diff --git a/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_tau_pet.py b/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_tau_pet.py index e8220a133..846da08d6 100644 --- a/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_tau_pet.py +++ b/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_tau_pet.py @@ -44,12 +44,11 @@ def convert_adni_tau_pet( import pandas as pd - from clinica.iotools.converters.adni_to_bids.adni_utils import paths_to_bids + from clinica.iotools.converters.adni_to_bids.adni_utils import paths_to_bids, load_clinical_csv from clinica.utils.stream import cprint if not subjects: - adni_merge_path = path.join(csv_dir, "ADNIMERGE.csv") - adni_merge = pd.read_csv(adni_merge_path, sep=",", low_memory=False) + adni_merge =load_clinical_csv(csv_dir, "ADNIMERGE") subjects = list(adni_merge.PTID.unique()) cprint( @@ -81,6 +80,7 @@ def compute_tau_pet_paths(source_dir, csv_dir, subjs_list, conversion_dir): from clinica.iotools.converters.adni_to_bids.adni_utils import ( find_image_path, get_images_pet, + load_clinical_csv ) from clinica.utils.pet import Tracer @@ -100,11 +100,9 @@ def compute_tau_pet_paths(source_dir, csv_dir, subjs_list, conversion_dir): pet_tau_dfs_list = [] # Loading needed .csv files - tauqc = pd.read_csv(path.join(csv_dir, "TAUQC.csv"), sep=",", low_memory=False) - tauqc3 = pd.read_csv(path.join(csv_dir, "TAUQC3.csv"), sep=",", low_memory=False) - pet_meta_list = pd.read_csv( - path.join(csv_dir, "PET_META_LIST.csv"), sep=",", low_memory=False - ) + tauqc = load_clinical_csv(csv_dir, "TAUQC") + tauqc3 = load_clinical_csv(csv_dir, "TAUQC3") + pet_meta_list = load_clinical_csv(csv_dir, "PET_META_LIST") for subj in subjs_list: diff --git a/clinica/iotools/converters/adni_to_bids/adni_to_bids.py b/clinica/iotools/converters/adni_to_bids/adni_to_bids.py index b42497b49..5377c12a8 100644 --- a/clinica/iotools/converters/adni_to_bids/adni_to_bids.py +++ b/clinica/iotools/converters/adni_to_bids/adni_to_bids.py @@ -10,7 +10,7 @@ def get_bids_subjs_info( ): from os import path - from pandas import read_csv + from clinica.iotools.converters.adni_to_bids.adni_utils import load_clinical_csv # Read optional list of participants. subjects_list = ( @@ -20,10 +20,8 @@ def get_bids_subjs_info( ) # Load all participants from ADNIMERGE. - adni_merge_path = path.join(clinical_data_dir, "ADNIMERGE.csv") - participants = set( - read_csv(adni_merge_path, sep=",", usecols=["PTID"], squeeze=True).unique() - ) + adni_merge = load_clinical_csv(clinical_data_dir, "ADNIMERGE") + participants = adni_merge["PTID"].unique() # Filter participants if requested. participants = sorted( @@ -200,11 +198,11 @@ def convert_images( import clinica.iotools.converters.adni_to_bids.adni_modalities.adni_t1 as adni_t1 import clinica.iotools.converters.adni_to_bids.adni_modalities.adni_tau_pet as adni_tau from clinica.utils.stream import cprint + from clinica.iotools.converters.adni_to_bids.adni_utils import load_clinical_csv modalities = modalities or self.get_modalities_supported() - adni_merge_path = path.join(clinical_dir, "ADNIMERGE.csv") - adni_merge = pd.read_csv(adni_merge_path) + adni_merge = load_clinical_csv(clinical_dir, "ADNIMERGE") # Load a file with subjects list or compute all the subjects if subjs_list_path is not None: diff --git a/clinica/iotools/converters/adni_to_bids/adni_utils.py b/clinica/iotools/converters/adni_to_bids/adni_utils.py index 53b6554fc..269df0d68 100644 --- a/clinica/iotools/converters/adni_to_bids/adni_utils.py +++ b/clinica/iotools/converters/adni_to_bids/adni_utils.py @@ -632,9 +632,7 @@ def correct_diagnosis_sc_adni3(clinical_data_dir, participants_df): from clinica.utils.stream import cprint diagnosis_dict = {1: "CN", 2: "MCI", 3: "AD"} - dxsum_df = pd.read_csv( - path.join(clinical_data_dir, "DXSUM_PDXCONV_ADNIALL.csv") - ).set_index(["PTID", "VISCODE2"]) + dxsum_df = load_clinical_csv(clinical_data_dir, "DXSUM_PDXCONV_ADNIALL").set_index(["PTID", "VISCODE2"]) missing_sc = participants_df[participants_df.original_study == "ADNI3"] participants_df.set_index("alternative_id_1", drop=True, inplace=True) for alternative_id in missing_sc.alternative_id_1.values: @@ -1582,3 +1580,16 @@ def remove_tmp_dmc_folder(bids_dir, image_id): tmp_dcm_folder_path = join(bids_dir, f"tmp_dcm_folder_{str(image_id).strip(' ')}") if exists(tmp_dcm_folder_path): rmtree(tmp_dcm_folder_path) + +def load_clinical_csv(clinical_dir: str, filename: str) -> pd.DataFrame: + from pathlib import Path + import re + + pattern = filename + '(_\d{1,2}[A-Za-z]{3}\d{4})?.csv' + for z in Path(clinical_dir).rglob("*.csv"): + if re.search(pattern, (z.name)): + adni_merge_path = z + try: + return pd.read_csv(adni_merge_path) + except: + raise ValueError(f"{filename}.csv was not found. Please check your data.") \ No newline at end of file From 6ab28236d8b70b50033083b8af6414c47c768410 Mon Sep 17 00:00:00 2001 From: JOULOT Matthieu Date: Wed, 15 Nov 2023 16:07:38 +0100 Subject: [PATCH 04/16] format with black and isort --- .../adni_modalities/adni_av45_fbb_pet.py | 10 ++++++---- .../adni_to_bids/adni_modalities/adni_dwi.py | 8 +++++--- .../adni_modalities/adni_fdg_pet.py | 10 +++++++--- .../adni_modalities/adni_flair.py | 10 ++++++---- .../adni_to_bids/adni_modalities/adni_fmri.py | 10 ++++++---- .../adni_modalities/adni_pib_pet.py | 6 ++++-- .../adni_to_bids/adni_modalities/adni_t1.py | 13 ++++++------- .../adni_modalities/adni_tau_pet.py | 10 ++++++---- .../converters/adni_to_bids/adni_to_bids.py | 2 +- .../converters/adni_to_bids/adni_utils.py | 19 ++++++++----------- 10 files changed, 55 insertions(+), 43 deletions(-) diff --git a/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_av45_fbb_pet.py b/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_av45_fbb_pet.py index 1db5056ef..842d885f9 100644 --- a/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_av45_fbb_pet.py +++ b/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_av45_fbb_pet.py @@ -44,7 +44,10 @@ def convert_adni_av45_fbb_pet( import pandas as pd - from clinica.iotools.converters.adni_to_bids.adni_utils import paths_to_bids, load_clinical_csv + from clinica.iotools.converters.adni_to_bids.adni_utils import ( + load_clinical_csv, + paths_to_bids, + ) from clinica.utils.stream import cprint if not subjects: @@ -88,7 +91,7 @@ def compute_av45_fbb_pet_paths(source_dir, csv_dir, subjs_list, conversion_dir): from clinica.iotools.converters.adni_to_bids.adni_utils import ( find_image_path, get_images_pet, - load_clinical_csv + load_clinical_csv, ) pet_amyloid_col = [ @@ -111,9 +114,8 @@ def compute_av45_fbb_pet_paths(source_dir, csv_dir, subjs_list, conversion_dir): av45qc = load_clinical_csv(csv_dir, "AV45QC") amyqc = load_clinical_csv(csv_dir, "AMYQC") pet_meta_list = load_clinical_csv(csv_dir, "PET_META_LIST") - - for subj in subjs_list: + for subj in subjs_list: # PET images metadata for subject subject_pet_meta = pet_meta_list[pet_meta_list["Subject"] == subj] diff --git a/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_dwi.py b/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_dwi.py index 9f1d8c556..d7f1513fc 100644 --- a/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_dwi.py +++ b/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_dwi.py @@ -44,7 +44,10 @@ def convert_adni_dwi( import pandas as pd - from clinica.iotools.converters.adni_to_bids.adni_utils import paths_to_bids, load_clinical_csv + from clinica.iotools.converters.adni_to_bids.adni_utils import ( + load_clinical_csv, + paths_to_bids, + ) from clinica.utils.stream import cprint if not subjects: @@ -81,8 +84,8 @@ def compute_dwi_paths(source_dir, csv_dir, subjs_list, conversion_dir): from clinica.iotools.converters.adni_to_bids.adni_utils import ( find_image_path, - visits_to_timepoints, load_clinical_csv, + visits_to_timepoints, ) dwi_col_df = [ @@ -116,7 +119,6 @@ def compute_dwi_paths(source_dir, csv_dir, subjs_list, conversion_dir): ] for subj in subjs_list: - # Filter ADNIMERGE, MRI_LIST and QC for only one subject and sort the rows/visits by examination date adnimerge_subj = adni_merge[adni_merge.PTID == subj] adnimerge_subj = adnimerge_subj.sort_values("EXAMDATE") diff --git a/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_fdg_pet.py b/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_fdg_pet.py index 11be2cf98..a06a48a5d 100644 --- a/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_fdg_pet.py +++ b/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_fdg_pet.py @@ -86,12 +86,15 @@ def _convert_adni_fdg_pet( import pandas as pd - from clinica.iotools.converters.adni_to_bids.adni_utils import paths_to_bids, load_clinical_csv + from clinica.iotools.converters.adni_to_bids.adni_utils import ( + load_clinical_csv, + paths_to_bids, + ) from clinica.utils.stream import cprint if subjects is None: adni_merge = load_clinical_csv(csv_dir, "ADNIMERGE") - + subjects = list(adni_merge.PTID.unique()) cprint( "Calculating paths of FDG PET images. " @@ -226,7 +229,8 @@ def _load_df_with_column_check( csv_dir: Path, filename: str, required_columns: Set[str] ) -> pd.DataFrame: """Load the requested CSV file in a dataframe and check that the requested columns are present.""" - from clinica.iotools.converters.adni_to_bids.adni_utils import load_clinical_csv + from clinica.iotools.converters.adni_to_bids.adni_utils import load_clinical_csv + df = load_clinical_csv(csv_dir, filename) if not required_columns.issubset(set(df.columns)): raise ValueError( diff --git a/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_flair.py b/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_flair.py index 6d37e129b..d5ba9f359 100644 --- a/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_flair.py +++ b/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_flair.py @@ -44,7 +44,10 @@ def convert_adni_flair( import pandas as pd - from clinica.iotools.converters.adni_to_bids.adni_utils import paths_to_bids, load_clinical_csv + from clinica.iotools.converters.adni_to_bids.adni_utils import ( + load_clinical_csv, + paths_to_bids, + ) from clinica.utils.stream import cprint if not subjects: @@ -81,8 +84,8 @@ def compute_flair_paths(source_dir, csv_dir, subjs_list, conversion_dir): from clinica.iotools.converters.adni_to_bids.adni_utils import ( find_image_path, - visits_to_timepoints, load_clinical_csv, + visits_to_timepoints, ) flair_col_df = [ @@ -105,7 +108,7 @@ def compute_flair_paths(source_dir, csv_dir, subjs_list, conversion_dir): mayo_mri_qc = load_clinical_csv(csv_dir, "MAYOADIRL_MRI_IMAGEQC_12_08_15") mayo_mri_qc = mayo_mri_qc[mayo_mri_qc.series_type == "AFL"] mri_list = load_clinical_csv(csv_dir, "MRILIST") - + # Selecting FLAIR DTI images that are not MPR mri_list = mri_list[mri_list.SEQUENCE.str.contains("flair", case=False, na=False)] unwanted_sequences = ["_MPR_"] @@ -116,7 +119,6 @@ def compute_flair_paths(source_dir, csv_dir, subjs_list, conversion_dir): ] for subj in subjs_list: - # Filter ADNIMERGE, MRI_LIST and QC for only one subject and sort the rows/visits by examination date adnimerge_subj = adni_merge[adni_merge.PTID == subj] adnimerge_subj = adnimerge_subj.sort_values("EXAMDATE") diff --git a/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_fmri.py b/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_fmri.py index 8bdcec29c..e0885cb65 100644 --- a/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_fmri.py +++ b/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_fmri.py @@ -44,7 +44,10 @@ def convert_adni_fmri( import pandas as pd - from clinica.iotools.converters.adni_to_bids.adni_utils import paths_to_bids, load_clinical_csv + from clinica.iotools.converters.adni_to_bids.adni_utils import ( + load_clinical_csv, + paths_to_bids, + ) from clinica.utils.stream import cprint if not subjects: @@ -81,8 +84,8 @@ def compute_fmri_path(source_dir, csv_dir, subjs_list, conversion_dir): from clinica.iotools.converters.adni_to_bids.adni_utils import ( find_image_path, - visits_to_timepoints, load_clinical_csv, + visits_to_timepoints, ) fmri_col = [ @@ -103,7 +106,7 @@ def compute_fmri_path(source_dir, csv_dir, subjs_list, conversion_dir): adni_merge = load_clinical_csv(csv_dir, "ADNIMERGE") mayo_mri_qc = load_clinical_csv(csv_dir, "MAYOADIRL_MRI_IMAGEQC_12_08_15") - + mayo_mri_qc = mayo_mri_qc[mayo_mri_qc.series_type == "fMRI"] mayo_mri_qc.columns = [x.upper() for x in mayo_mri_qc.columns] @@ -129,7 +132,6 @@ def compute_fmri_path(source_dir, csv_dir, subjs_list, conversion_dir): # We will convert the images for each subject in the subject list for subj in subjs_list: - # Filter ADNIMERGE, MRI_LIST and QC for only one subject and sort the rows/visits by examination date adnimerge_subj = adni_merge[adni_merge.PTID == subj] adnimerge_subj = adnimerge_subj.sort_values("EXAMDATE") diff --git a/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_pib_pet.py b/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_pib_pet.py index 5216767e2..156b6e862 100644 --- a/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_pib_pet.py +++ b/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_pib_pet.py @@ -44,7 +44,10 @@ def convert_adni_pib_pet( import pandas as pd - from clinica.iotools.converters.adni_to_bids.adni_utils import paths_to_bids, load_clinical_csv + from clinica.iotools.converters.adni_to_bids.adni_utils import ( + load_clinical_csv, + paths_to_bids, + ) from clinica.utils.stream import cprint if not subjects: @@ -105,7 +108,6 @@ def compute_pib_pet_paths(source_dir, csv_dir, subjs_list, conversion_dir): pet_meta_list = load_clinical_csv(csv_dir, "PET_META_LIST") for subj in subjs_list: - # PET images metadata for subject subject_pet_meta = pet_meta_list[pet_meta_list["Subject"] == subj] diff --git a/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_t1.py b/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_t1.py index e1a2c573d..86b85891c 100644 --- a/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_t1.py +++ b/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_t1.py @@ -44,7 +44,10 @@ def convert_adni_t1( from pandas.io import parsers - from clinica.iotools.converters.adni_to_bids.adni_utils import paths_to_bids, load_clinical_csv + from clinica.iotools.converters.adni_to_bids.adni_utils import ( + load_clinical_csv, + paths_to_bids, + ) from clinica.utils.stream import cprint if not subjects: @@ -80,8 +83,8 @@ def compute_t1_paths(source_dir, csv_dir, subjs_list, conversion_dir): from clinica.iotools.converters.adni_to_bids.adni_utils import ( find_image_path, + load_clinical_csv, visits_to_timepoints, - load_clinical_csv ) from clinica.utils.stream import cprint @@ -105,13 +108,12 @@ def compute_t1_paths(source_dir, csv_dir, subjs_list, conversion_dir): mprage_meta = load_clinical_csv(csv_dir, "MPRAGEMETA") mri_quality = load_clinical_csv(csv_dir, "MRIQUALITY") mayo_mri_qc = load_clinical_csv(csv_dir, "MAYOADIRL_MRI_IMAGEQC_12_08_15") - + # Keep only T1 scans mayo_mri_qc = mayo_mri_qc[mayo_mri_qc.series_type == "T1"] # We will convert the images for each subject in the subject list for subj in subjs_list: - # Filter ADNIMERGE, MPRAGE METADATA and QC for only one subject and sort the rows/visits by examination date adnimerge_subj = adni_merge[adni_merge.PTID == subj] adnimerge_subj = adnimerge_subj.sort_values("EXAMDATE") @@ -486,7 +488,6 @@ def select_scan_from_qc(scans_meta, mayo_mri_qc_subj, preferred_field_strength): not_preferred_scan = None if scans_meta.MagStrength.unique()[0] == 3.0: - id_list = scans_meta.ImageUID.unique() image_ids = ["I" + str(imageuid) for imageuid in id_list] int_ids = [int(imageuid) for imageuid in id_list] @@ -504,7 +505,6 @@ def select_scan_from_qc(scans_meta, mayo_mri_qc_subj, preferred_field_strength): images_not_rejected = images_qc[images_qc.series_quality < 4] if images_not_rejected.empty: - # There are no images that passed the qc, # so we'll try to see if there are other images without qc. # Otherwise, return None. @@ -593,7 +593,6 @@ def check_qc(scan, subject_id, visit_str, mri_quality_subj): # If QC exists and failed we keep the other scan (in case 2 scans were performed) if not qc.empty and qc.iloc[0].PASS != 1: - cprint("QC found but NOT passed") cprint( f"Subject {subject_id} for visit {visit_str} " diff --git a/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_tau_pet.py b/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_tau_pet.py index 846da08d6..c7c5b0894 100644 --- a/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_tau_pet.py +++ b/clinica/iotools/converters/adni_to_bids/adni_modalities/adni_tau_pet.py @@ -44,11 +44,14 @@ def convert_adni_tau_pet( import pandas as pd - from clinica.iotools.converters.adni_to_bids.adni_utils import paths_to_bids, load_clinical_csv + from clinica.iotools.converters.adni_to_bids.adni_utils import ( + load_clinical_csv, + paths_to_bids, + ) from clinica.utils.stream import cprint if not subjects: - adni_merge =load_clinical_csv(csv_dir, "ADNIMERGE") + adni_merge = load_clinical_csv(csv_dir, "ADNIMERGE") subjects = list(adni_merge.PTID.unique()) cprint( @@ -80,7 +83,7 @@ def compute_tau_pet_paths(source_dir, csv_dir, subjs_list, conversion_dir): from clinica.iotools.converters.adni_to_bids.adni_utils import ( find_image_path, get_images_pet, - load_clinical_csv + load_clinical_csv, ) from clinica.utils.pet import Tracer @@ -105,7 +108,6 @@ def compute_tau_pet_paths(source_dir, csv_dir, subjs_list, conversion_dir): pet_meta_list = load_clinical_csv(csv_dir, "PET_META_LIST") for subj in subjs_list: - # PET images metadata for subject subject_pet_meta = pet_meta_list[pet_meta_list["Subject"] == subj] diff --git a/clinica/iotools/converters/adni_to_bids/adni_to_bids.py b/clinica/iotools/converters/adni_to_bids/adni_to_bids.py index 5377c12a8..4b48de5c2 100644 --- a/clinica/iotools/converters/adni_to_bids/adni_to_bids.py +++ b/clinica/iotools/converters/adni_to_bids/adni_to_bids.py @@ -197,8 +197,8 @@ def convert_images( import clinica.iotools.converters.adni_to_bids.adni_modalities.adni_pib_pet as adni_pib import clinica.iotools.converters.adni_to_bids.adni_modalities.adni_t1 as adni_t1 import clinica.iotools.converters.adni_to_bids.adni_modalities.adni_tau_pet as adni_tau - from clinica.utils.stream import cprint from clinica.iotools.converters.adni_to_bids.adni_utils import load_clinical_csv + from clinica.utils.stream import cprint modalities = modalities or self.get_modalities_supported() diff --git a/clinica/iotools/converters/adni_to_bids/adni_utils.py b/clinica/iotools/converters/adni_to_bids/adni_utils.py index 269df0d68..a09ed032e 100644 --- a/clinica/iotools/converters/adni_to_bids/adni_utils.py +++ b/clinica/iotools/converters/adni_to_bids/adni_utils.py @@ -81,7 +81,6 @@ def visits_to_timepoints( # Then for images.Visit non matching the expected labels we find the closest date in visits list for visit in unique_visits: - image = (mri_list_subj[mri_list_subj[visit_field] == visit]).iloc[0] closest_visit = _get_closest_visit( @@ -436,7 +435,6 @@ def select_image_qc(id_list, mri_qc_subj): images_not_rejected = images_qc[images_qc.series_quality < 4] if images_not_rejected.empty: - # There are no images that passed the qc, # so we'll try to see if there are other images without qc. # Otherwise, return None. @@ -632,7 +630,9 @@ def correct_diagnosis_sc_adni3(clinical_data_dir, participants_df): from clinica.utils.stream import cprint diagnosis_dict = {1: "CN", 2: "MCI", 3: "AD"} - dxsum_df = load_clinical_csv(clinical_data_dir, "DXSUM_PDXCONV_ADNIALL").set_index(["PTID", "VISCODE2"]) + dxsum_df = load_clinical_csv(clinical_data_dir, "DXSUM_PDXCONV_ADNIALL").set_index( + ["PTID", "VISCODE2"] + ) missing_sc = participants_df[participants_df.original_study == "ADNI3"] participants_df.set_index("alternative_id_1", drop=True, inplace=True) for alternative_id in missing_sc.alternative_id_1.values: @@ -941,10 +941,8 @@ def create_adni_sessions_dict( # Iterate over the metadata files for location in files: - location = location.split("/")[0] if path.exists(path.join(clinical_data_dir, location)): - file_to_read_path = path.join(clinical_data_dir, location) cprint(f"\tReading clinical data file: {location}") @@ -1416,7 +1414,6 @@ def create_file(image, modality, bids_dir, mod_to_update): os.remove(im) if mod_to_update or not len(existing_im) > 0: - try: os.makedirs(output_path) except OSError: @@ -1554,7 +1551,6 @@ def check_two_dcm_folder(dicom_path, bids_folder, image_uid): dicom_list = glob(path.join(dicom_path, "*.dcm")) image_list = glob(path.join(dicom_path, f"*{image_uid}.dcm")) if len(dicom_list) != len(image_list): - # Remove the precedent tmp_dcm_folder if present. if os.path.exists(dest_path): shutil.rmtree(dest_path) @@ -1581,15 +1577,16 @@ def remove_tmp_dmc_folder(bids_dir, image_id): if exists(tmp_dcm_folder_path): rmtree(tmp_dcm_folder_path) + def load_clinical_csv(clinical_dir: str, filename: str) -> pd.DataFrame: - from pathlib import Path import re + from pathlib import Path - pattern = filename + '(_\d{1,2}[A-Za-z]{3}\d{4})?.csv' + pattern = filename + "(_\d{1,2}[A-Za-z]{3}\d{4})?.csv" for z in Path(clinical_dir).rglob("*.csv"): if re.search(pattern, (z.name)): adni_merge_path = z try: - return pd.read_csv(adni_merge_path) + return pd.read_csv(adni_merge_path, sep=",", low_memory=False) except: - raise ValueError(f"{filename}.csv was not found. Please check your data.") \ No newline at end of file + raise ValueError(f"{filename}.csv was not found. Please check your data.") From c7f03a84ee7e7f1cdfe61fe46ee381cf2bee72b6 Mon Sep 17 00:00:00 2001 From: JOULOT Matthieu Date: Wed, 15 Nov 2023 17:18:05 +0100 Subject: [PATCH 05/16] change the making of sessions.tsv --- .../converters/adni_to_bids/adni_utils.py | 58 ++++++++++--------- 1 file changed, 31 insertions(+), 27 deletions(-) diff --git a/clinica/iotools/converters/adni_to_bids/adni_utils.py b/clinica/iotools/converters/adni_to_bids/adni_utils.py index a09ed032e..7686ea4d5 100644 --- a/clinica/iotools/converters/adni_to_bids/adni_utils.py +++ b/clinica/iotools/converters/adni_to_bids/adni_utils.py @@ -924,7 +924,9 @@ def create_adni_sessions_dict( bids_subjs_paths: a list with the path to all the BIDS subjects """ + import re from os import path + from pathlib import Path import pandas as pd @@ -942,38 +944,40 @@ def create_adni_sessions_dict( for location in files: location = location.split("/")[0] - if path.exists(path.join(clinical_data_dir, location)): - file_to_read_path = path.join(clinical_data_dir, location) - cprint(f"\tReading clinical data file: {location}") + pattern = location.split(".")[0] + "(_\d{1,2}[A-Za-z]{3}\d{4})?.csv" + for z in Path(clinical_data_dir).rglob("*.csv"): + if re.search(pattern, z.name): + file_to_read_path = z + cprint(f"\tReading clinical data file: {location}") - df_file = pd.read_csv(file_to_read_path, dtype=str) - df_filtered = filter_subj_bids(df_file, location, bids_ids).copy() + df_file = pd.read_csv(file_to_read_path, dtype=str) + df_filtered = filter_subj_bids(df_file, location, bids_ids).copy() - if not df_filtered.empty: - df_filtered = _compute_session_id(df_filtered, location) + if not df_filtered.empty: + df_filtered = _compute_session_id(df_filtered, location) - # Filter rows with invalid session IDs. - df_filtered.dropna(subset="session_id", inplace=True) + # Filter rows with invalid session IDs. + df_filtered.dropna(subset="session_id", inplace=True) - if location == "ADNIMERGE.csv": - df_filtered["AGE"] = df_filtered.apply( - lambda x: update_age(x), axis=1 + if location == "ADNIMERGE.csv": + df_filtered["AGE"] = df_filtered.apply( + lambda x: update_age(x), axis=1 + ) + df_subj_session = update_sessions_df( + df_subj_session, df_filtered, df_sessions, location ) - df_subj_session = update_sessions_df( - df_subj_session, df_filtered, df_sessions, location - ) - else: - cprint( - f"Clinical dataframe extracted from {location} is empty after filtering." - ) - dict_column_correspondence = dict( - zip(df_sessions["ADNI"], df_sessions["BIDS CLINICA"]) - ) - df_filtered.rename(columns=dict_column_correspondence, inplace=True) - df_filtered = df_filtered.loc[ - :, (~df_filtered.columns.isin(df_subj_session.columns)) - ] - df_subj_session = pd.concat([df_subj_session, df_filtered], axis=1) + else: + cprint( + f"Clinical dataframe extracted from {location} is empty after filtering." + ) + dict_column_correspondence = dict( + zip(df_sessions["ADNI"], df_sessions["BIDS CLINICA"]) + ) + df_filtered.rename(columns=dict_column_correspondence, inplace=True) + df_filtered = df_filtered.loc[ + :, (~df_filtered.columns.isin(df_subj_session.columns)) + ] + df_subj_session = pd.concat([df_subj_session, df_filtered], axis=1) if df_subj_session.empty: raise ValueError("Empty dataset detected. Clinical data cannot be extracted.") From 4d52e498af28484b88f0dd59ae54ef86245b167f Mon Sep 17 00:00:00 2001 From: JOULOT Matthieu Date: Thu, 16 Nov 2023 12:39:31 +0100 Subject: [PATCH 06/16] apply suggestion from review --- clinica/iotools/bids_utils.py | 7 ++++--- .../converters/adni_to_bids/adni_utils.py | 20 ++++++++++++++----- 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/clinica/iotools/bids_utils.py b/clinica/iotools/bids_utils.py index 03edc32d0..9eafd4767 100644 --- a/clinica/iotools/bids_utils.py +++ b/clinica/iotools/bids_utils.py @@ -68,9 +68,8 @@ def create_participants_df( import numpy as np import pandas as pd - from clinica.utils.stream import cprint from clinica.iotools.converters.adni_to_bids.adni_utils import load_clinical_csv - + from clinica.utils.stream import cprint fields_bids = ["participant_id"] prev_location = "" @@ -113,7 +112,9 @@ def create_participants_df( if file_ext == ".xlsx": file_to_read = pd.read_excel(file_to_read_path, sheet_name=sheet) elif file_ext == ".csv": - file_to_read = load_clinical_csv(clinical_data_dir, location.split(".")[0]) + file_to_read = load_clinical_csv( + clinical_data_dir, location.split(".")[0] + ) prev_location = location prev_sheet = sheet diff --git a/clinica/iotools/converters/adni_to_bids/adni_utils.py b/clinica/iotools/converters/adni_to_bids/adni_utils.py index 7686ea4d5..86d300b9d 100644 --- a/clinica/iotools/converters/adni_to_bids/adni_utils.py +++ b/clinica/iotools/converters/adni_to_bids/adni_utils.py @@ -1587,10 +1587,20 @@ def load_clinical_csv(clinical_dir: str, filename: str) -> pd.DataFrame: from pathlib import Path pattern = filename + "(_\d{1,2}[A-Za-z]{3}\d{4})?.csv" - for z in Path(clinical_dir).rglob("*.csv"): - if re.search(pattern, (z.name)): - adni_merge_path = z + # for z in Path(clinical_dir).rglob("*.csv"): + files_matching_pattern = [ + f for f in Path(clinical_dir).rglob("*.csv") if re.search(pattern, (f.name)) + ] + if len(files_matching_pattern) != 1: + raise IOError( + f"Expecting to find exactly one file in folder {clinical_dir} " + f"matching pattern {pattern}. {len(files_matching_pattern)} " + f"files were found instead : \n{'- '.join(files_matching_pattern)}" + ) try: - return pd.read_csv(adni_merge_path, sep=",", low_memory=False) + return pd.read_csv(files_matching_pattern[0], sep=",", low_memory=False) except: - raise ValueError(f"{filename}.csv was not found. Please check your data.") + raise ValueError( + f"File {files_matching_pattern} was found but could not " + "be loaded as a DataFrame. Please check your data." + ) From 63cf1bcdd80ad0e4bd4309d4f891e4a7663922e6 Mon Sep 17 00:00:00 2001 From: JOULOT Matthieu Date: Thu, 16 Nov 2023 14:54:44 +0100 Subject: [PATCH 07/16] nested to try to allow failure for cosntruction of sessions.tsv --- .../converters/adni_to_bids/adni_utils.py | 61 ++++++++----------- 1 file changed, 27 insertions(+), 34 deletions(-) diff --git a/clinica/iotools/converters/adni_to_bids/adni_utils.py b/clinica/iotools/converters/adni_to_bids/adni_utils.py index 86d300b9d..ab7935162 100644 --- a/clinica/iotools/converters/adni_to_bids/adni_utils.py +++ b/clinica/iotools/converters/adni_to_bids/adni_utils.py @@ -941,44 +941,38 @@ def create_adni_sessions_dict( # write line to get field_bids = sessions['BIDS CLINICA'] without the null values # Iterate over the metadata files - for location in files: location = location.split("/")[0] - pattern = location.split(".")[0] + "(_\d{1,2}[A-Za-z]{3}\d{4})?.csv" - for z in Path(clinical_data_dir).rglob("*.csv"): - if re.search(pattern, z.name): - file_to_read_path = z - cprint(f"\tReading clinical data file: {location}") - - df_file = pd.read_csv(file_to_read_path, dtype=str) - df_filtered = filter_subj_bids(df_file, location, bids_ids).copy() - - if not df_filtered.empty: - df_filtered = _compute_session_id(df_filtered, location) + try: + df_file = load_clinical_csv(clinical_data_dir, location.split(".")[0]) + df_filtered = filter_subj_bids(df_file, location, bids_ids).copy() + if not df_filtered.empty: + df_filtered = _compute_session_id(df_filtered, location) - # Filter rows with invalid session IDs. - df_filtered.dropna(subset="session_id", inplace=True) + # Filter rows with invalid session IDs. + df_filtered.dropna(subset="session_id", inplace=True) - if location == "ADNIMERGE.csv": - df_filtered["AGE"] = df_filtered.apply( - lambda x: update_age(x), axis=1 - ) - df_subj_session = update_sessions_df( - df_subj_session, df_filtered, df_sessions, location - ) - else: - cprint( - f"Clinical dataframe extracted from {location} is empty after filtering." - ) - dict_column_correspondence = dict( - zip(df_sessions["ADNI"], df_sessions["BIDS CLINICA"]) + if location == "ADNIMERGE.csv": + df_filtered["AGE"] = df_filtered.apply( + lambda x: update_age(x), axis=1 ) - df_filtered.rename(columns=dict_column_correspondence, inplace=True) - df_filtered = df_filtered.loc[ - :, (~df_filtered.columns.isin(df_subj_session.columns)) - ] - df_subj_session = pd.concat([df_subj_session, df_filtered], axis=1) - + df_subj_session = update_sessions_df( + df_subj_session, df_filtered, df_sessions, location + ) + else: + cprint( + f"Clinical dataframe extracted from {location} is empty after filtering." + ) + dict_column_correspondence = dict( + zip(df_sessions["ADNI"], df_sessions["BIDS CLINICA"]) + ) + df_filtered.rename(columns=dict_column_correspondence, inplace=True) + df_filtered = df_filtered.loc[ + :, (~df_filtered.columns.isin(df_subj_session.columns)) + ] + df_subj_session = pd.concat([df_subj_session, df_filtered], axis=1) + except: + pass if df_subj_session.empty: raise ValueError("Empty dataset detected. Clinical data cannot be extracted.") @@ -1587,7 +1581,6 @@ def load_clinical_csv(clinical_dir: str, filename: str) -> pd.DataFrame: from pathlib import Path pattern = filename + "(_\d{1,2}[A-Za-z]{3}\d{4})?.csv" - # for z in Path(clinical_dir).rglob("*.csv"): files_matching_pattern = [ f for f in Path(clinical_dir).rglob("*.csv") if re.search(pattern, (f.name)) ] From 6b284a5b7432eb906c7259a96ddf038352f450c2 Mon Sep 17 00:00:00 2001 From: JOULOT Matthieu Date: Thu, 16 Nov 2023 15:01:40 +0100 Subject: [PATCH 08/16] modify unit-test to look for the right file --- .../adni_to_bids/adni_modalities/test_adni_fdg_pet.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/unittests/iotools/converters/adni_to_bids/adni_modalities/test_adni_fdg_pet.py b/test/unittests/iotools/converters/adni_to_bids/adni_modalities/test_adni_fdg_pet.py index 7fe1bd23c..9513a3fd4 100644 --- a/test/unittests/iotools/converters/adni_to_bids/adni_modalities/test_adni_fdg_pet.py +++ b/test/unittests/iotools/converters/adni_to_bids/adni_modalities/test_adni_fdg_pet.py @@ -75,7 +75,7 @@ def test_load_df_with_column_check(tmp_path, input_df, required_columns): input_df.to_csv(tmp_path / "data.csv", index=False) assert_frame_equal( - _load_df_with_column_check(tmp_path, "data.csv", required_columns), input_df + _load_df_with_column_check(tmp_path, "data", required_columns), input_df ) @@ -90,7 +90,7 @@ def test_load_df_with_column_check_errors(tmp_path, input_df): ValueError, match="Missing", ): - _load_df_with_column_check(tmp_path, "data.csv", {"foo", "foobaz"}) + _load_df_with_column_check(tmp_path, "data", {"foo", "foobaz"}) EXPECTED_FDG_DF_COLUMNS = [ From fa49acb64d13c6b398afa43fb292cf4b838f7d76 Mon Sep 17 00:00:00 2001 From: JOULOT Matthieu Date: Thu, 16 Nov 2023 15:40:34 +0100 Subject: [PATCH 09/16] make nesting smaller and smarter using continue --- .../converters/adni_to_bids/adni_utils.py | 52 +++++++++---------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/clinica/iotools/converters/adni_to_bids/adni_utils.py b/clinica/iotools/converters/adni_to_bids/adni_utils.py index ab7935162..97c5a515c 100644 --- a/clinica/iotools/converters/adni_to_bids/adni_utils.py +++ b/clinica/iotools/converters/adni_to_bids/adni_utils.py @@ -925,6 +925,7 @@ def create_adni_sessions_dict( """ import re + import warnings from os import path from pathlib import Path @@ -945,34 +946,33 @@ def create_adni_sessions_dict( location = location.split("/")[0] try: df_file = load_clinical_csv(clinical_data_dir, location.split(".")[0]) - df_filtered = filter_subj_bids(df_file, location, bids_ids).copy() - if not df_filtered.empty: - df_filtered = _compute_session_id(df_filtered, location) + except IOError as e: + warnings.warn(e) + continue + df_filtered = filter_subj_bids(df_file, location, bids_ids).copy() + if not df_filtered.empty: + df_filtered = _compute_session_id(df_filtered, location) - # Filter rows with invalid session IDs. - df_filtered.dropna(subset="session_id", inplace=True) + # Filter rows with invalid session IDs. + df_filtered.dropna(subset="session_id", inplace=True) - if location == "ADNIMERGE.csv": - df_filtered["AGE"] = df_filtered.apply( - lambda x: update_age(x), axis=1 - ) - df_subj_session = update_sessions_df( - df_subj_session, df_filtered, df_sessions, location - ) - else: - cprint( - f"Clinical dataframe extracted from {location} is empty after filtering." - ) - dict_column_correspondence = dict( - zip(df_sessions["ADNI"], df_sessions["BIDS CLINICA"]) - ) - df_filtered.rename(columns=dict_column_correspondence, inplace=True) - df_filtered = df_filtered.loc[ - :, (~df_filtered.columns.isin(df_subj_session.columns)) - ] - df_subj_session = pd.concat([df_subj_session, df_filtered], axis=1) - except: - pass + if location == "ADNIMERGE.csv": + df_filtered["AGE"] = df_filtered.apply(lambda x: update_age(x), axis=1) + df_subj_session = update_sessions_df( + df_subj_session, df_filtered, df_sessions, location + ) + else: + cprint( + f"Clinical dataframe extracted from {location} is empty after filtering." + ) + dict_column_correspondence = dict( + zip(df_sessions["ADNI"], df_sessions["BIDS CLINICA"]) + ) + df_filtered.rename(columns=dict_column_correspondence, inplace=True) + df_filtered = df_filtered.loc[ + :, (~df_filtered.columns.isin(df_subj_session.columns)) + ] + df_subj_session = pd.concat([df_subj_session, df_filtered], axis=1) if df_subj_session.empty: raise ValueError("Empty dataset detected. Clinical data cannot be extracted.") From a01505297ce06ba54f27d90e97f869a7715de9f9 Mon Sep 17 00:00:00 2001 From: JOULOT Matthieu Date: Thu, 16 Nov 2023 15:47:18 +0100 Subject: [PATCH 10/16] remove warning and useless imports --- clinica/iotools/converters/adni_to_bids/adni_utils.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/clinica/iotools/converters/adni_to_bids/adni_utils.py b/clinica/iotools/converters/adni_to_bids/adni_utils.py index 97c5a515c..bf0b1727d 100644 --- a/clinica/iotools/converters/adni_to_bids/adni_utils.py +++ b/clinica/iotools/converters/adni_to_bids/adni_utils.py @@ -924,11 +924,6 @@ def create_adni_sessions_dict( bids_subjs_paths: a list with the path to all the BIDS subjects """ - import re - import warnings - from os import path - from pathlib import Path - import pandas as pd from clinica.utils.stream import cprint @@ -946,8 +941,7 @@ def create_adni_sessions_dict( location = location.split("/")[0] try: df_file = load_clinical_csv(clinical_data_dir, location.split(".")[0]) - except IOError as e: - warnings.warn(e) + except IOError: continue df_filtered = filter_subj_bids(df_file, location, bids_ids).copy() if not df_filtered.empty: @@ -1588,7 +1582,7 @@ def load_clinical_csv(clinical_dir: str, filename: str) -> pd.DataFrame: raise IOError( f"Expecting to find exactly one file in folder {clinical_dir} " f"matching pattern {pattern}. {len(files_matching_pattern)} " - f"files were found instead : \n{'- '.join(files_matching_pattern)}" + f"files were found instead : \n{'- '.join(str(files_matching_pattern))}" ) try: return pd.read_csv(files_matching_pattern[0], sep=",", low_memory=False) From 2f643ac011b164b85e2c276ea2cb93a45f9d2e6c Mon Sep 17 00:00:00 2001 From: JOULOT Matthieu Date: Thu, 16 Nov 2023 16:14:19 +0100 Subject: [PATCH 11/16] add docstring --- .../converters/adni_to_bids/adni_utils.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/clinica/iotools/converters/adni_to_bids/adni_utils.py b/clinica/iotools/converters/adni_to_bids/adni_utils.py index bf0b1727d..36d4dd1bd 100644 --- a/clinica/iotools/converters/adni_to_bids/adni_utils.py +++ b/clinica/iotools/converters/adni_to_bids/adni_utils.py @@ -1571,6 +1571,23 @@ def remove_tmp_dmc_folder(bids_dir, image_id): def load_clinical_csv(clinical_dir: str, filename: str) -> pd.DataFrame: + """Load the clinical csv from ADNI. This function is able to find the csv in the + different known format available, the old format with just the name, and the new + format with the name and the date of download. + + Parameters + ---------- + clinical_dir: str + Directory containing the csv. + + filename: str + name of the file without the suffix. + + Returns + ------- + pd.DataFrame: + Dataframe corresponding to the filename. + """ import re from pathlib import Path From 85b5282441d39c88c76b8c8a9ae7891ba2f63128 Mon Sep 17 00:00:00 2001 From: JOULOT Matthieu Date: Thu, 16 Nov 2023 16:45:34 +0100 Subject: [PATCH 12/16] add unit test --- .../converters/adni_to_bids/test_adni_utils.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/test/unittests/iotools/converters/adni_to_bids/test_adni_utils.py b/test/unittests/iotools/converters/adni_to_bids/test_adni_utils.py index caa13f92a..ec22fe19b 100644 --- a/test/unittests/iotools/converters/adni_to_bids/test_adni_utils.py +++ b/test/unittests/iotools/converters/adni_to_bids/test_adni_utils.py @@ -339,3 +339,14 @@ def test_get_closest_visit(closest_visit_timepoints, image_acquisition_date, exp ), closest_visit_timepoints[expected], ) + + +@pytest.mark.parametrize( + "csv_name,csv_to_look_for", + [("adnimerge.csv", "adnimerge"), ("adnimerge_20Oct2023.csv", "adnimerge")], +) +def test_load_clinical_csv(tmp_path, input_df, csv_name, csv_to_look_for): + from clinica.iotools.converters.adni_to_bids.adni_utils import load_clinical_csv + + input_df.to_csv(tmp_path / csv_name) + assert_frame_equal(load_clinical_csv(tmp_path, csv_to_look_for), input_df) From f88d2781a5d4062aa6e36fa0f79cc9890115bd19 Mon Sep 17 00:00:00 2001 From: JOULOT Matthieu Date: Thu, 16 Nov 2023 16:58:06 +0100 Subject: [PATCH 13/16] correct unit test --- .../converters/adni_to_bids/test_adni_utils.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/test/unittests/iotools/converters/adni_to_bids/test_adni_utils.py b/test/unittests/iotools/converters/adni_to_bids/test_adni_utils.py index ec22fe19b..29da8a12a 100644 --- a/test/unittests/iotools/converters/adni_to_bids/test_adni_utils.py +++ b/test/unittests/iotools/converters/adni_to_bids/test_adni_utils.py @@ -341,6 +341,18 @@ def test_get_closest_visit(closest_visit_timepoints, image_acquisition_date, exp ) +@pytest.fixture +def input_df(): + return pd.DataFrame( + { + "foo": ["foo1", "foo2", "foo3"], + "bar": [1, 2, 3], + "baz": [True, False, False], + "foobar": [4, 5, 6], + } + ) + + @pytest.mark.parametrize( "csv_name,csv_to_look_for", [("adnimerge.csv", "adnimerge"), ("adnimerge_20Oct2023.csv", "adnimerge")], From 111033e52919e5c25e5cf76d94489ea156a3ce4a Mon Sep 17 00:00:00 2001 From: JOULOT Matthieu Date: Fri, 17 Nov 2023 12:04:01 +0100 Subject: [PATCH 14/16] add errors tests --- .../converters/adni_to_bids/adni_utils.py | 2 +- .../adni_to_bids/test_adni_utils.py | 44 ++++++++++++++++++- 2 files changed, 44 insertions(+), 2 deletions(-) diff --git a/clinica/iotools/converters/adni_to_bids/adni_utils.py b/clinica/iotools/converters/adni_to_bids/adni_utils.py index 36d4dd1bd..703efb8ea 100644 --- a/clinica/iotools/converters/adni_to_bids/adni_utils.py +++ b/clinica/iotools/converters/adni_to_bids/adni_utils.py @@ -1605,6 +1605,6 @@ def load_clinical_csv(clinical_dir: str, filename: str) -> pd.DataFrame: return pd.read_csv(files_matching_pattern[0], sep=",", low_memory=False) except: raise ValueError( - f"File {files_matching_pattern} was found but could not " + f"File {str(files_matching_pattern[0])} was found but could not " "be loaded as a DataFrame. Please check your data." ) diff --git a/test/unittests/iotools/converters/adni_to_bids/test_adni_utils.py b/test/unittests/iotools/converters/adni_to_bids/test_adni_utils.py index 29da8a12a..0fc4ed87d 100644 --- a/test/unittests/iotools/converters/adni_to_bids/test_adni_utils.py +++ b/test/unittests/iotools/converters/adni_to_bids/test_adni_utils.py @@ -360,5 +360,47 @@ def input_df(): def test_load_clinical_csv(tmp_path, input_df, csv_name, csv_to_look_for): from clinica.iotools.converters.adni_to_bids.adni_utils import load_clinical_csv - input_df.to_csv(tmp_path / csv_name) + input_df.to_csv(tmp_path / csv_name, index=False) assert_frame_equal(load_clinical_csv(tmp_path, csv_to_look_for), input_df) + + +@pytest.mark.parametrize( + "csv_to_look_for", + [("adnimerge")], +) +def test_load_clinical_csv_error(tmp_path, csv_to_look_for): + import re + + from clinica.iotools.converters.adni_to_bids.adni_utils import load_clinical_csv + + pattern = "(_\d{1,2}[A-Za-z]{3}\d{4})?.csv" + with pytest.raises( + IOError, + match=re.escape( + f"Expecting to find exactly one file in folder {tmp_path} " + f"matching pattern {csv_to_look_for}{pattern}. 0 " + f"files were found instead : \n[- ]" + ), + ): + load_clinical_csv(tmp_path, csv_to_look_for) + + +@pytest.mark.parametrize( + "csv_to_look_for", + [("adnimerge")], +) +def test_load_clinical_csv_value_error(tmp_path, csv_to_look_for): + import re + + from clinica.iotools.converters.adni_to_bids.adni_utils import load_clinical_csv + + with open(tmp_path / "adnimerge.csv", "w") as fp: + fp.write("col1,col2,col3\n1,2,3\n1,2,3,4") + + # input_df.to_csv(tmp_path / csv_name, sep="\t", index=False) + with pytest.raises( + ValueError, + match=f"File {tmp_path}/{csv_to_look_for}.csv was found but could not " + "be loaded as a DataFrame. Please check your data.", + ): + load_clinical_csv(tmp_path, csv_to_look_for) From 3cdc0c5b9647f35884c881f28b03f85d02cf72a5 Mon Sep 17 00:00:00 2001 From: JOULOT Matthieu Date: Fri, 17 Nov 2023 14:12:25 +0100 Subject: [PATCH 15/16] apply spellcheck --- .../adni_to_bids/test_adni_utils.py | 22 +++++-------------- 1 file changed, 6 insertions(+), 16 deletions(-) diff --git a/test/unittests/iotools/converters/adni_to_bids/test_adni_utils.py b/test/unittests/iotools/converters/adni_to_bids/test_adni_utils.py index 0fc4ed87d..5d62482dc 100644 --- a/test/unittests/iotools/converters/adni_to_bids/test_adni_utils.py +++ b/test/unittests/iotools/converters/adni_to_bids/test_adni_utils.py @@ -363,12 +363,7 @@ def test_load_clinical_csv(tmp_path, input_df, csv_name, csv_to_look_for): input_df.to_csv(tmp_path / csv_name, index=False) assert_frame_equal(load_clinical_csv(tmp_path, csv_to_look_for), input_df) - -@pytest.mark.parametrize( - "csv_to_look_for", - [("adnimerge")], -) -def test_load_clinical_csv_error(tmp_path, csv_to_look_for): +def test_load_clinical_csv_error(tmp_path, ): import re from clinica.iotools.converters.adni_to_bids.adni_utils import load_clinical_csv @@ -378,18 +373,14 @@ def test_load_clinical_csv_error(tmp_path, csv_to_look_for): IOError, match=re.escape( f"Expecting to find exactly one file in folder {tmp_path} " - f"matching pattern {csv_to_look_for}{pattern}. 0 " + f"matching pattern adnimerge{pattern}. 0 " f"files were found instead : \n[- ]" ), ): - load_clinical_csv(tmp_path, csv_to_look_for) + load_clinical_csv(tmp_path, "adnimerge") -@pytest.mark.parametrize( - "csv_to_look_for", - [("adnimerge")], -) -def test_load_clinical_csv_value_error(tmp_path, csv_to_look_for): +def test_load_clinical_csv_value_error(tmp_path): import re from clinica.iotools.converters.adni_to_bids.adni_utils import load_clinical_csv @@ -397,10 +388,9 @@ def test_load_clinical_csv_value_error(tmp_path, csv_to_look_for): with open(tmp_path / "adnimerge.csv", "w") as fp: fp.write("col1,col2,col3\n1,2,3\n1,2,3,4") - # input_df.to_csv(tmp_path / csv_name, sep="\t", index=False) with pytest.raises( ValueError, - match=f"File {tmp_path}/{csv_to_look_for}.csv was found but could not " + match=f"File {tmp_path}/adnimerge.csv was found but could not " "be loaded as a DataFrame. Please check your data.", ): - load_clinical_csv(tmp_path, csv_to_look_for) + load_clinical_csv(tmp_path, "adnimerge") From 2638a60943e003efcf0e114409005bbd0175b0e8 Mon Sep 17 00:00:00 2001 From: JOULOT Matthieu Date: Fri, 17 Nov 2023 14:42:28 +0100 Subject: [PATCH 16/16] format --- .../iotools/converters/adni_to_bids/test_adni_utils.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/test/unittests/iotools/converters/adni_to_bids/test_adni_utils.py b/test/unittests/iotools/converters/adni_to_bids/test_adni_utils.py index 5d62482dc..3aaed7525 100644 --- a/test/unittests/iotools/converters/adni_to_bids/test_adni_utils.py +++ b/test/unittests/iotools/converters/adni_to_bids/test_adni_utils.py @@ -363,7 +363,10 @@ def test_load_clinical_csv(tmp_path, input_df, csv_name, csv_to_look_for): input_df.to_csv(tmp_path / csv_name, index=False) assert_frame_equal(load_clinical_csv(tmp_path, csv_to_look_for), input_df) -def test_load_clinical_csv_error(tmp_path, ): + +def test_load_clinical_csv_error( + tmp_path, +): import re from clinica.iotools.converters.adni_to_bids.adni_utils import load_clinical_csv