aramis-lab · AliceJoubert · Oct 23, 2024 · Oct 23, 2024
diff --git a/clinica/iotools/converters/aibl_to_bids/utils/clinical.py b/clinica/iotools/converters/aibl_to_bids/utils/clinical.py
@@ -153,12 +153,12 @@ def create_sessions_tsv_file(
     clinical_data_dir: Path,
     clinical_specifications_folder: Path,
 ) -> None:
-    """Extract the information regarding the sessions and save them in a tsv file.
+    """Extract the information regarding a subject sessions and save them in a tsv file.
 
     Parameters
     ----------
     input_path : Path
-        The path to the input folder.
+        The path to the input folder (BIDS directory).
 
     clinical_data_dir : Path
         The path to the directory to the clinical data files.
@@ -211,7 +211,9 @@ def create_sessions_tsv_file(
 
                 elif field in list(df.columns.values) and field == "CDGLOBAL":
                     cd_global = df.loc[(df["RID"] == rid), field]
-                    cd_global[cd_global == -4] = "n/a"
+                    cd_global[
+                        cd_global == -4
+                    ] = "n/a"  # todo : do that mapping later, same for other fields
 
                 elif field in list(df.columns.values) and field == "DXCURREN":
                     dx_curren = df.loc[(df["RID"] == rid), field]
@@ -229,7 +231,13 @@ def create_sessions_tsv_file(
         exam_dates = _clean_exam_dates(
             rid, exam_date.to_list(), visit_code.to_list(), clinical_data_dir
         )
-        age = _compute_ages_at_each_exam(patient_date_of_birth.values[0], exam_dates)
+
+        if not patient_date_of_birth.empty:
+            age = _compute_ages_at_each_exam(
+                patient_date_of_birth.values[0], exam_dates
+            )
+        else:
+            age = "n/a"
 
         visit_code[visit_code == "bl"] = "M000"
         visit_code = visit_code.str.upper()
@@ -365,8 +373,13 @@ def _compute_ages_at_each_exam(
 
     for exam_date in exam_dates:
         exam_date = datetime.strptime(exam_date, "%m/%d/%Y")
-        delta = exam_date - date_of_birth
-        ages.append(round(delta.days / 365.25, 1))
+        delta = exam_date.year - date_of_birth.year
+        ages.append(delta)
+
+    # todo :rq : what is the use of being so precise ?? we are comparing a year with a full date.. that's false anyway
+    #  we could give ages in years (int, >=0) and just subtract the years
+
+    # todo : what happens if wrong format ? or exam < birth for some reason ?
 
     return ages
 

diff --git a/test/unittests/iotools/converters/aibl_to_bids/test_aibl_utils.py b/test/unittests/iotools/converters/aibl_to_bids/test_aibl_utils.py
@@ -1,3 +1,7 @@
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
 import pytest
 
 
@@ -40,3 +44,162 @@ def test_get_first_file_matching_pattern_error(tmp_path, pattern, msg):
 
     with pytest.raises(ValueError, match=msg):
         _get_first_file_matching_pattern(tmp_path, pattern)
+
+
+@pytest.mark.parametrize(
+    "birth_date, exam_date, age",
+    [
+        (
+            "/2000",
+            ["01/02/2000", "02/01/2000", "01/01/2001", "07/06/2003"],
+            [0, 0, 1, 3],
+        ),
+        ("/2001", ["12/30/2003"], [2]),
+    ],
+)
+def test_compute_age(birth_date, exam_date, age):
+    from clinica.iotools.converters.aibl_to_bids.utils.clinical import (
+        _compute_ages_at_each_exam,
+    )
+
+    assert _compute_ages_at_each_exam(birth_date, exam_date) == age
+
+
+def build_sessions_spec(tmp_path: Path) -> Path:
+    spec = pd.DataFrame(
+        {
+            "BIDS CLINICA": [
+                "examination_date",
+                "age",
+                "cdr_global",
+                "MMS",
+                "diagnosis",
+            ],
+            "AIBL": ["EXAMDATE", "PTDOB", "CDGLOBAL", "MMSCORE", "DXCURREN"],
+            "AIBL location": [
+                "aibl_neurobat_*.csv",
+                "aibl_ptdemog_*.csv",
+                "aibl_cdr_*.csv",
+                "aibl_mmse_*.csv",
+                "aibl_pdxconv_*.csv",
+            ],
+        }
+    )
+    spec.to_csv(tmp_path / "sessions.tsv", index=False, sep="\t")
+    return tmp_path
+
+
+def build_bids_dir(tmp_path: Path) -> Path:
+    bids_dir = tmp_path / "BIDS"
+    bids_dir.mkdir()
+    (bids_dir / "sub-AIBL1" / "ses-M000").mkdir(parents=True)
+    (bids_dir / "sub-AIBL100" / "ses-M000").mkdir(parents=True)
+    (bids_dir / "sub-AIBL100" / "ses-M012").mkdir(parents=True)
+    return bids_dir
+
+
+def build_clinical_data(tmp_path: Path) -> Path:
+    data_path = tmp_path / "clinical_data"
+    data_path.mkdir()
+
+    neuro = pd.DataFrame(
+        {
+            "RID": [1, 2, 12, 100, 100],  # %m/%d/%Y
+            "VISCODE": ["bl", "bl", "bl", "bl", "m12"],
+            "EXAMDATE": [
+                "01/01/2001",
+                "01/01/2002",
+                "01/01/2012",
+                "01/01/2100",
+                "12/01/2100",
+            ],
+        }
+    )
+    neuro.to_csv(data_path / "aibl_neurobat_230ct2024.csv", index=False)
+
+    ptdemog = pd.DataFrame(
+        {
+            "RID": [1, 2, 12, 101],
+            "VISCODE": ["bl", "bl", "bl", "bl"],
+            "PTDOB": ["/1901", "/1902", "/1912", "/2001"],
+        }
+    )
+    ptdemog.to_csv(data_path / "aibl_ptdemog_230ct2024.csv", index=False)
+
+    cdr = pd.DataFrame(
+        {
+            "RID": [1, 2, 12, 100, 100],
+            "VISCODE": ["bl", "bl", "bl", "bl", "m12"],
+            "CDGLOBAL": [-4, 1, 0.5, 0, 0],
+        }
+    )  # rq:float
+    cdr.to_csv(data_path / "aibl_cdr_230ct2024.csv", index=False)
+
+    mmse = pd.DataFrame(
+        {
+            "RID": [1, 2, 12, 100, 100],
+            "VISCODE": ["bl", "bl", "bl", "bl", "m12"],
+            "MMSCORE": [-4, 10, 10, 30, 29],
+        }
+    )  # rq:int
+    mmse.to_csv(data_path / "aibl_mmse_230ct2024.csv", index=False)
+
+    pdx = pd.DataFrame(
+        {
+            "RID": [1, 2, 12, 100, 100],
+            "VISCODE": ["bl", "bl", "bl", "bl", "m12"],
+            "DXCURREN": [-4, 0, 0, 1, 3],
+        }
+    )  # rq : int
+    pdx.to_csv(data_path / "aibl_pdxconv_230ct2024.csv", index=False)
+
+    return data_path
+
+
+def test_create_sessions_tsv(tmp_path):
+    from clinica.iotools.converters.aibl_to_bids.utils.clinical import (
+        create_sessions_tsv_file,
+    )
+
+    bids_path = build_bids_dir(tmp_path)
+
+    create_sessions_tsv_file(
+        input_path=bids_path,
+        clinical_data_dir=build_clinical_data(tmp_path),
+        clinical_specifications_folder=build_sessions_spec(tmp_path),
+    )
+    result_sub100_list = list(bids_path.rglob("*sub-AIBL100_sessions.tsv"))
+    result_sub1_list = list(bids_path.rglob("*sub-AIBL1_sessions.tsv"))
+
+    assert len(result_sub100_list) == 1
+    assert len(result_sub1_list) == 1
+
+    result_sub100 = pd.read_csv(result_sub100_list[0], sep="\t")
+    result_sub1 = pd.read_csv(result_sub1_list[0], sep="\t")
+
+    expected_sub100 = pd.DataFrame(
+        {
+            "session_id": ["ses-M000", "ses-M012"],
+            "months": [0, 12],
+            "age": [np.nan, np.nan],
+            "MMS": [30, 29],
+            "cdr_global": [0.0, 0.0],
+            "diagnosis": ["CN", "AD"],
+            "examination_date": ["01/01/2100", "12/01/2100"],
+        }
+    )
+
+    expected_sub1 = pd.DataFrame(
+        {
+            "session_id": ["ses-M000"],
+            "months": [0],
+            "age": [100],
+            "MMS": [np.nan],
+            "cdr_global": [np.nan],
+            "diagnosis": [np.nan],
+            "examination_date": ["01/01/2001"],
+        }
+    )
+
+    assert expected_sub1.equals(result_sub1)
+    assert expected_sub100.equals(result_sub100)