Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[TEST] AIBL-to-BIDS : add unit tests to functions related to creating sessions.tsv files #1347

Draft
wants to merge 2 commits into
base: dev
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 19 additions & 6 deletions clinica/iotools/converters/aibl_to_bids/utils/clinical.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,12 +153,12 @@ def create_sessions_tsv_file(
clinical_data_dir: Path,
clinical_specifications_folder: Path,
) -> None:
"""Extract the information regarding the sessions and save them in a tsv file.
"""Extract the information regarding a subject sessions and save them in a tsv file.

Parameters
----------
input_path : Path
The path to the input folder.
The path to the input folder (BIDS directory).

clinical_data_dir : Path
The path to the directory to the clinical data files.
Expand Down Expand Up @@ -211,7 +211,9 @@ def create_sessions_tsv_file(

elif field in list(df.columns.values) and field == "CDGLOBAL":
cd_global = df.loc[(df["RID"] == rid), field]
cd_global[cd_global == -4] = "n/a"
cd_global[
cd_global == -4
] = "n/a" # todo : do that mapping later, same for other fields

elif field in list(df.columns.values) and field == "DXCURREN":
dx_curren = df.loc[(df["RID"] == rid), field]
Expand All @@ -229,7 +231,13 @@ def create_sessions_tsv_file(
exam_dates = _clean_exam_dates(
rid, exam_date.to_list(), visit_code.to_list(), clinical_data_dir
)
age = _compute_ages_at_each_exam(patient_date_of_birth.values[0], exam_dates)

if not patient_date_of_birth.empty:
age = _compute_ages_at_each_exam(
patient_date_of_birth.values[0], exam_dates
)
else:
age = "n/a"

visit_code[visit_code == "bl"] = "M000"
visit_code = visit_code.str.upper()
Expand Down Expand Up @@ -365,8 +373,13 @@ def _compute_ages_at_each_exam(

for exam_date in exam_dates:
exam_date = datetime.strptime(exam_date, "%m/%d/%Y")
delta = exam_date - date_of_birth
ages.append(round(delta.days / 365.25, 1))
delta = exam_date.year - date_of_birth.year
ages.append(delta)

# todo :rq : what is the use of being so precise ?? we are comparing a year with a full date.. that's false anyway
# we could give ages in years (int, >=0) and just subtract the years

# todo : what happens if wrong format ? or exam < birth for some reason ?

return ages

Expand Down
163 changes: 163 additions & 0 deletions test/unittests/iotools/converters/aibl_to_bids/test_aibl_utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
from pathlib import Path

import numpy as np
import pandas as pd
import pytest


Expand Down Expand Up @@ -40,3 +44,162 @@ def test_get_first_file_matching_pattern_error(tmp_path, pattern, msg):

with pytest.raises(ValueError, match=msg):
_get_first_file_matching_pattern(tmp_path, pattern)


@pytest.mark.parametrize(
"birth_date, exam_date, age",
[
(
"/2000",
["01/02/2000", "02/01/2000", "01/01/2001", "07/06/2003"],
[0, 0, 1, 3],
),
("/2001", ["12/30/2003"], [2]),
],
)
def test_compute_age(birth_date, exam_date, age):
from clinica.iotools.converters.aibl_to_bids.utils.clinical import (
_compute_ages_at_each_exam,
)

assert _compute_ages_at_each_exam(birth_date, exam_date) == age


def build_sessions_spec(tmp_path: Path) -> Path:
spec = pd.DataFrame(
{
"BIDS CLINICA": [
"examination_date",
"age",
"cdr_global",
"MMS",
"diagnosis",
],
"AIBL": ["EXAMDATE", "PTDOB", "CDGLOBAL", "MMSCORE", "DXCURREN"],
"AIBL location": [
"aibl_neurobat_*.csv",
"aibl_ptdemog_*.csv",
"aibl_cdr_*.csv",
"aibl_mmse_*.csv",
"aibl_pdxconv_*.csv",
],
}
)
spec.to_csv(tmp_path / "sessions.tsv", index=False, sep="\t")
return tmp_path


def build_bids_dir(tmp_path: Path) -> Path:
bids_dir = tmp_path / "BIDS"
bids_dir.mkdir()
(bids_dir / "sub-AIBL1" / "ses-M000").mkdir(parents=True)
(bids_dir / "sub-AIBL100" / "ses-M000").mkdir(parents=True)
(bids_dir / "sub-AIBL100" / "ses-M012").mkdir(parents=True)
return bids_dir


def build_clinical_data(tmp_path: Path) -> Path:
data_path = tmp_path / "clinical_data"
data_path.mkdir()

neuro = pd.DataFrame(
{
"RID": [1, 2, 12, 100, 100], # %m/%d/%Y
"VISCODE": ["bl", "bl", "bl", "bl", "m12"],
"EXAMDATE": [
"01/01/2001",
"01/01/2002",
"01/01/2012",
"01/01/2100",
"12/01/2100",
],
}
)
neuro.to_csv(data_path / "aibl_neurobat_230ct2024.csv", index=False)

ptdemog = pd.DataFrame(
{
"RID": [1, 2, 12, 101],
"VISCODE": ["bl", "bl", "bl", "bl"],
"PTDOB": ["/1901", "/1902", "/1912", "/2001"],
}
)
ptdemog.to_csv(data_path / "aibl_ptdemog_230ct2024.csv", index=False)

cdr = pd.DataFrame(
{
"RID": [1, 2, 12, 100, 100],
"VISCODE": ["bl", "bl", "bl", "bl", "m12"],
"CDGLOBAL": [-4, 1, 0.5, 0, 0],
}
) # rq:float
cdr.to_csv(data_path / "aibl_cdr_230ct2024.csv", index=False)

mmse = pd.DataFrame(
{
"RID": [1, 2, 12, 100, 100],
"VISCODE": ["bl", "bl", "bl", "bl", "m12"],
"MMSCORE": [-4, 10, 10, 30, 29],
}
) # rq:int
mmse.to_csv(data_path / "aibl_mmse_230ct2024.csv", index=False)

pdx = pd.DataFrame(
{
"RID": [1, 2, 12, 100, 100],
"VISCODE": ["bl", "bl", "bl", "bl", "m12"],
"DXCURREN": [-4, 0, 0, 1, 3],
}
) # rq : int
pdx.to_csv(data_path / "aibl_pdxconv_230ct2024.csv", index=False)

return data_path


def test_create_sessions_tsv(tmp_path):
from clinica.iotools.converters.aibl_to_bids.utils.clinical import (
create_sessions_tsv_file,
)

bids_path = build_bids_dir(tmp_path)

create_sessions_tsv_file(
input_path=bids_path,
clinical_data_dir=build_clinical_data(tmp_path),
clinical_specifications_folder=build_sessions_spec(tmp_path),
)
result_sub100_list = list(bids_path.rglob("*sub-AIBL100_sessions.tsv"))
result_sub1_list = list(bids_path.rglob("*sub-AIBL1_sessions.tsv"))

assert len(result_sub100_list) == 1
assert len(result_sub1_list) == 1

result_sub100 = pd.read_csv(result_sub100_list[0], sep="\t")
result_sub1 = pd.read_csv(result_sub1_list[0], sep="\t")

expected_sub100 = pd.DataFrame(
{
"session_id": ["ses-M000", "ses-M012"],
"months": [0, 12],
"age": [np.nan, np.nan],
"MMS": [30, 29],
"cdr_global": [0.0, 0.0],
"diagnosis": ["CN", "AD"],
"examination_date": ["01/01/2100", "12/01/2100"],
}
)

expected_sub1 = pd.DataFrame(
{
"session_id": ["ses-M000"],
"months": [0],
"age": [100],
"MMS": [np.nan],
"cdr_global": [np.nan],
"diagnosis": [np.nan],
"examination_date": ["01/01/2001"],
}
)

assert expected_sub1.equals(result_sub1)
assert expected_sub100.equals(result_sub100)
Loading