From c00e2f49b6ab781c4fb4c810bed631b7838faf6b Mon Sep 17 00:00:00 2001 From: Taha Abdullah Date: Thu, 5 Sep 2024 13:58:43 +0200 Subject: [PATCH] Moving Fastsurfer and Pytest environment setup to local_runner image, thus simplifying, and streamlining the workflow and improving execution time significantly. Adding Labelled output directories for each run Removing unnecessary import statements from pytest files Cleaning up code: - migrating from os.path.join to pathlib Path - changing str to Path - cleaning up comments and docstrings - ran ruff on code to fix code style --- .github/workflows/quicktest_runner.yaml | 62 ++------------ test/__init__.py | 13 ++- test/quick_test/common.py | 2 +- test/quick_test/conftest.py | 3 +- test/quick_test/test_errors_in_logfiles.py | 31 ++++--- test/quick_test/test_file_existence.py | 50 +++-------- test/quick_test/test_images.py | 60 +++++++------- test/quick_test/test_stats.py | 96 +++++++++++++--------- 8 files changed, 129 insertions(+), 188 deletions(-) diff --git a/.github/workflows/quicktest_runner.yaml b/.github/workflows/quicktest_runner.yaml index e9469e01..05798810 100644 --- a/.github/workflows/quicktest_runner.yaml +++ b/.github/workflows/quicktest_runner.yaml @@ -62,53 +62,11 @@ jobs: echo "Error: FreeSurfer installation directory does not exist at $FREESURFER_HOME" exit 1 fi - # Set up Python using setup-python@v3 action - - name: Set up Python 3.10 - uses: actions/setup-python@v3 - with: - python-version: "3.10" - # Check if FastSurfer environment already exists - - name: Check Environment - run: | - if [ ! -f "$MAMBAPATH" ]; then - echo "FastSurfer environment does not exist. Creating FastSurfer Environment..." - echo "HAS_MAMBA=true" >> $GITHUB_OUTPUT - else - echo "FastSurfer environment exists. Skipping FastSurfer Environment creation..." - echo "HAS_MAMBA=false" >> $GITHUB_OUTPUT - fi - id: check-environment - # Create FastSurfer environment if it does not exist - - name: Create FastSurfer Environment - uses: mamba-org/setup-micromamba@v1 - with: - environment-file: env/fastsurfer.yml - micromamba-root-path: $MAMBAROOT - micromamba-binary-path: $MAMBAPATH - cache-downloads: true - cache-environment: true - init-shell: none - if: steps.check-environment.outputs.HAS_MAMBA == 'true' - # Set up cache for python packages - - name: Cache Python packages - uses: actions/cache@v3 - with: - path: /home/.cache/pip - key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }} - restore-keys: | - ${{ runner.os }}-pip- - # Install required python packages - - name: Install package - run: | - python -m pip install --progress-bar off --upgrade pip \ - setuptools wheel - python -m pip install --progress-bar off .[test] - # Run fastsurfer on list of subjects + # Run FastSurfer on test subjects - name: Run FastSurfer run: | echo "Running FastSurfer..." - export FREESURFER_HOME=/freesurfer - source $FREESURFER_HOME/SetUpFreeSurfer.sh + echo "Output will be saved in data/${GITHUB_SHA:0:7}" export FASTSURFER_HOME=$(pwd) export THIS_RUN_OUTDIR=${GITHUB_SHA:0:7} mkdir -p $SUBJECTS_DIR/$THIS_RUN_OUTDIR @@ -120,18 +78,10 @@ jobs: # Test fastsurfer output run-pytest: runs-on: self-hosted + if: always() needs: run-fastsurfer steps: - - name: Set up Python 3.10 - uses: actions/setup-python@v3 - with: - python-version: "3.10" - - name: Cache Python packages - uses: actions/cache@v3 - with: - path: /home/.cache/pip - key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }} - restore-keys: | - ${{ runner.os }}-pip- - name: Run pytest - run: python -m pytest test/quick_test + run: | + source /venv-pytest/bin/activate + python -m pytest test/quick_test diff --git a/test/__init__.py b/test/__init__.py index 29780dc9..db6655bb 100644 --- a/test/__init__.py +++ b/test/__init__.py @@ -1,8 +1,5 @@ - - - -__all__ = [ # This is a list of modules that should be imported when using the import * syntax - 'test_file_existence', - 'test_error_messages', - 'test_errors' - ] +__all__ = [ # This is a list of modules that should be imported when using the import * syntax + "test_file_existence", + "test_error_messages", + "test_errors", +] diff --git a/test/quick_test/common.py b/test/quick_test/common.py index 3354cef8..a977824d 100644 --- a/test/quick_test/common.py +++ b/test/quick_test/common.py @@ -19,7 +19,7 @@ def load_test_subjects(): test_subjects = [] # Load the reference and test files - with open(os.path.join(subjects_dir, subjects_list), 'r') as file: + with open(os.path.join(subjects_dir, subjects_list), "r") as file: for line in file: filename = line.strip() logger.debug(filename) diff --git a/test/quick_test/conftest.py b/test/quick_test/conftest.py index d58772cb..7fe8806d 100644 --- a/test/quick_test/conftest.py +++ b/test/quick_test/conftest.py @@ -3,6 +3,7 @@ import pytest + @pytest.fixture def subjects_dir(): return Path(os.environ["SUBJECTS_DIR"]) @@ -20,4 +21,4 @@ def reference_dir(): @pytest.fixture def subjects_list(): - return os.environ["SUBJECTS_LIST"] + return Path(os.environ["SUBJECTS_LIST"]) diff --git a/test/quick_test/test_errors_in_logfiles.py b/test/quick_test/test_errors_in_logfiles.py index dab5f58e..1bd2fc09 100644 --- a/test/quick_test/test_errors_in_logfiles.py +++ b/test/quick_test/test_errors_in_logfiles.py @@ -1,4 +1,3 @@ -import os import pytest import yaml from pathlib import Path @@ -24,23 +23,23 @@ def load_errors(): # Open the error_file_path and read the errors and whitelist into arrays - error_file_path = os.path.join(Path(__file__).parent, "data/logfile.errors.yaml") + error_file_path = Path(__file__).parent / "data" / "logfile.errors.yaml" - with open(error_file_path, 'r') as file: + with open(error_file_path, "r") as file: data = yaml.safe_load(file) - errors = data.get('errors', []) - whitelist = data.get('whitelist', []) + errors = data.get("errors", []) + whitelist = data.get("whitelist", []) return errors, whitelist -def load_log_files(test_subject: str): +def load_log_files(test_subject: Path): """ Retrieve the log files in the given log directory. Parameters ---------- - test_subject : str + test_subject : Path Subject directory to test. Returns @@ -51,24 +50,24 @@ def load_log_files(test_subject: str): # Retrieve the log files in given log directory - log_directory = os.path.join(test_subject, "scripts") - log_files = [file for file in Path(log_directory).iterdir() if file.suffix == '.log'] + log_directory = test_subject / "scripts" + log_files = [file for file in Path(log_directory).iterdir() if file.suffix == ".log"] return log_files @pytest.mark.parametrize("test_subject", load_test_subjects()) -def test_errors(subjects_dir, test_dir, test_subject): +def test_errors(subjects_dir: Path, test_dir: Path, test_subject: Path): """ Test if there are any errors in the log files. Parameters ---------- - subjects_dir : str + subjects_dir : Path Subjects directory. - test_dir : str + test_dir : Path Tests directory. - test_subject : str + test_subject : Path Subject to test. Raises @@ -77,7 +76,7 @@ def test_errors(subjects_dir, test_dir, test_subject): If any of the keywords are in the log files. """ - test_subject = os.path.join(subjects_dir, test_dir, test_subject) + test_subject = subjects_dir / test_dir / test_subject log_files = load_log_files(test_subject) error_flag = False @@ -91,14 +90,14 @@ def test_errors(subjects_dir, test_dir, test_subject): rel_path = log_file.relative_to(subjects_dir) logger.debug(f"Checking file: {rel_path}") try: - with log_file.open('r') as file: + with log_file.open("r") as file: lines = file.readlines() lines_with_errors = [] for line_number, line in enumerate(lines, start=1): if any(error in line.lower() for error in errors): if not any(white in line.lower() for white in whitelist): # Get two lines before and after the current line - context = lines[max(0, line_number - 2):min(len(lines), line_number + 3)] + context = lines[max(0, line_number - 2) : min(len(lines), line_number + 3)] lines_with_errors.append((line_number, context)) # print(lines_with_errors) files_with_errors[rel_path] = lines_with_errors diff --git a/test/quick_test/test_file_existence.py b/test/quick_test/test_file_existence.py index 6e85d8d5..fe1122f1 100644 --- a/test/quick_test/test_file_existence.py +++ b/test/quick_test/test_file_existence.py @@ -1,44 +1,21 @@ -import os import pytest -import yaml from .common import * from logging import getLogger +from pathlib import Path + logger = getLogger(__name__) -# def get_files_from_yaml(file_path: str): -# """ -# Get the list of files from the YAML file. -# -# Parameters -# ---------- -# file_path : str -# Path to the YAML file. -# -# Returns -# ------- -# list -# List of files specified in the YAML file. -# """ -# -# # Open the file_path and read the files into an array -# with open(file_path, 'r') as file: -# data = yaml.safe_load(file) -# files = data.get('files', []) -# -# return files - - -def get_files_from_folder(folder_path: str): +def get_files_from_folder(folder_path: Path): """ Get the list of files in the directory relative to the folder path. Parameters ---------- - folder_path : str + folder_path : Path Path to the folder. Returns @@ -49,27 +26,26 @@ def get_files_from_folder(folder_path: str): # Get a list of all files in the folder recursively filenames = [] - for root, _, files in os.walk(folder_path): - for file in files: - filenames.append(os.path.relpath(os.path.join(root, file), folder_path)) + for file in Path(folder_path).rglob("*"): + filenames.append(str(file.relative_to(folder_path))) return filenames @pytest.mark.parametrize("test_subject", load_test_subjects()) -def test_file_existence(subjects_dir, test_dir, reference_dir, test_subject): +def test_file_existence(subjects_dir: Path, test_dir: Path, reference_dir: Path, test_subject: Path): """ Test the existence of files in the folder. Parameters ---------- - subjects_dir : str + subjects_dir : Path Path to the subjects directory. - test_dir : str + test_dir : Path Name of the test directory. - reference_dir : str + reference_dir : Path Name of the reference directory. - test_subject : str + test_subject : Path Name of the test subject. Raises @@ -81,11 +57,11 @@ def test_file_existence(subjects_dir, test_dir, reference_dir, test_subject): print(test_subject) # Get reference files from the reference subject directory - reference_subject = os.path.join(subjects_dir, reference_dir, test_subject) + reference_subject = subjects_dir / reference_dir / test_subject reference_files = get_files_from_folder(reference_subject) # Get test list of files in the test subject directory - test_subject = os.path.join(subjects_dir, test_dir, test_subject) + test_subject = subjects_dir / test_dir / test_subject test_files = get_files_from_folder(test_subject) # Check if each file in the reference list exists in the test list diff --git a/test/quick_test/test_images.py b/test/quick_test/test_images.py index 129d324f..e37dd96b 100644 --- a/test/quick_test/test_images.py +++ b/test/quick_test/test_images.py @@ -1,10 +1,8 @@ -import os import pytest -from pathlib import Path import nibabel as nib import nibabel.cmdline.diff import numpy as np -import yaml +from pathlib import Path from collections import OrderedDict @@ -17,15 +15,15 @@ logger = getLogger(__name__) -def load_image(subject_path, image_name): +def load_image(subject_path: Path, image_name: Path): """ Load the image data using nibabel. Parameters ---------- - subject_path : str + subject_path : Path Path to the subject directory. - image_name : str + image_name : Path Name of the image file. Returns @@ -33,7 +31,7 @@ def load_image(subject_path, image_name): nibabel.nifti1.Nifti1Image Image data. """ - image_path = os.path.join(subject_path, "mri", image_name) + image_path = subject_path / "mri" / image_name image = nib.load(image_path) return image @@ -100,19 +98,19 @@ def compute_mean_square_error(test_data, reference_data): @pytest.mark.parametrize("test_subject", load_test_subjects()) -def test_image_headers(subjects_dir, test_dir, reference_dir, test_subject): +def test_image_headers(subjects_dir: Path, test_dir: Path, reference_dir: Path, test_subject: Path): """ Test the image headers by comparing the headers of the test and reference images. Parameters ---------- - subjects_dir : str + subjects_dir : Path Path to the subjects directory. - test_dir : str + test_dir : Path Name of test directory. - reference_dir: str + reference_dir: Path Name of reference directory. - test_subject : str + test_subject : Path Name of the test subject. Raises @@ -122,9 +120,10 @@ def test_image_headers(subjects_dir, test_dir, reference_dir, test_subject): """ # Load images - test_subject = os.path.join(subjects_dir, test_dir, test_subject) + test_subject = subjects_dir / test_dir / test_subject test_image = load_image(test_subject, "brain.mgz") - reference_subject = os.path.join(subjects_dir, reference_dir, test_subject) + + reference_subject = subjects_dir / reference_dir / test_subject reference_image = load_image(reference_subject, "brain.mgz") # Get the image headers @@ -137,19 +136,19 @@ def test_image_headers(subjects_dir, test_dir, reference_dir, test_subject): @pytest.mark.parametrize("test_subject", load_test_subjects()) -def test_seg_data(subjects_dir, test_dir, reference_dir, test_subject): +def test_seg_data(subjects_dir: Path, test_dir: Path, reference_dir: Path, test_subject: Path): """ Test the segmentation data by calculating and comparing dice scores. Parameters ---------- - subjects_dir : str + subjects_dir : Path Path to the subjects directory. - test_dir : str + test_dir : Path Name of test directory. - reference_dir : str + reference_dir : Path Name of reference directory. - test_subject : str + test_subject : Path Name of the test subject. Raises @@ -158,10 +157,10 @@ def test_seg_data(subjects_dir, test_dir, reference_dir, test_subject): If the dice score is not 0 for all classes """ - test_file = os.path.join(subjects_dir, test_dir, test_subject) + test_file = subjects_dir / test_dir / test_subject test_image = load_image(test_file, "aseg.mgz") - reference_subject = os.path.join(subjects_dir, reference_dir, test_subject) + reference_subject = subjects_dir / reference_dir / test_subject reference_image = load_image(reference_subject, "aseg.mgz") labels = np.unique([np.asarray(reference_image.dataobj), np.asarray(test_image.dataobj)]) @@ -174,8 +173,9 @@ def test_seg_data(subjects_dir, test_dir, reference_dir, test_subject): dscore = compute_dice_score(test_data, reference_data, labels) # Check the dice score - np.testing.assert_allclose(dscore, 0, atol=1e-6, rtol=1e-6, - err_msg=f"Dice scores are not within range for all classes") + np.testing.assert_allclose( + dscore, 0, atol=1e-6, rtol=1e-6, err_msg=f"Dice scores are not within range for all classes" + ) # assert dscore == 1, "Dice scores are not 1 for all classes" @@ -183,19 +183,19 @@ def test_seg_data(subjects_dir, test_dir, reference_dir, test_subject): @pytest.mark.parametrize("test_subject", load_test_subjects()) -def test_int_data(subjects_dir, test_dir, reference_dir, test_subject): +def test_int_data(subjects_dir: Path, test_dir: Path, reference_dir: Path, test_subject: Path): """ Test the intensity data by calculating and comparing the mean square error. Parameters ---------- - subjects_dir : str + subjects_dir : Path Path to the subjects directory. - test_dir : str + test_dir : Path Name of test directory. - reference_dir : str + reference_dir : Path Name of reference directory. - test_subject : str + test_subject : Path Name of the test subject. Raises @@ -204,10 +204,10 @@ def test_int_data(subjects_dir, test_dir, reference_dir, test_subject): If the mean square error is not 0 """ - test_file = os.path.join(subjects_dir, test_dir, test_subject) + test_file = subjects_dir / test_dir / test_subject test_image = load_image(test_file, "brain.mgz") - reference_subject = os.path.join(subjects_dir, reference_dir, test_subject) + reference_subject = subjects_dir / reference_dir / test_subject reference_image = load_image(reference_subject, "brain.mgz") # Get the image data diff --git a/test/quick_test/test_stats.py b/test/quick_test/test_stats.py index 1cc300bf..68225ea5 100644 --- a/test/quick_test/test_stats.py +++ b/test/quick_test/test_stats.py @@ -1,12 +1,10 @@ from .conftest import * -import os from pathlib import Path import pandas as pd import pytest import yaml -import numpy as np from .common import * @@ -28,61 +26,78 @@ def thresholds(): Dictionary containing the thresholds """ - thresholds_file = os.path.join(Path(__file__).parent, "data/thresholds/aseg.stats.yaml") + thresholds_file = Path(__file__).parent / "data/thresholds/aseg.stats.yaml" # Open the file_path and read the thresholds into a dictionary - with open(thresholds_file, 'r') as file: + with open(thresholds_file, "r") as file: data = yaml.safe_load(file) - default_threshold = data.get('default_threshold') - thresholds = data.get('thresholds', {}) + default_threshold = data.get("default_threshold") + thresholds = data.get("thresholds", {}) return default_threshold, thresholds -def load_stats_file(test_subject: str): +def load_stats_file(test_subject: Path): + """ + Load the stats file from the given file path. - files = os.listdir(os.path.join(test_subject, "stats")) + Parameters + ---------- + test_subject : Path + Path to the test subject. + + Returns + ------- + stats_file : Path + """ + + files = os.listdir(test_subject / "stats") if "aseg.stats" in files: - return os.path.join(test_subject, "stats", "aseg.stats") + return test_subject / "stats" / "aseg.stats" elif "aparc+DKT.stats" in files: - return os.path.join(test_subject, "stats", "aparc+DKT.stats") + return test_subject / "stats" / "aparc+DKT.stats" else: raise ValueError("Unknown stats file") -def load_structs(test_file: str): +def load_structs(test_file: Path): """ Load the structs from the given file path. + Parameters + ---------- + test_file : Path + Path to the test file. + Returns ------- structs : list List of structs. """ - if "aseg" in test_file: - structs_file = os.path.join(Path(__file__).parent, "data/thresholds/aseg.stats.yaml") - elif "aparc+DKT" in test_file: - structs_file = os.path.join(Path(__file__).parent, "data/thresholds/aparc+DKT.stats.yaml") + if "aseg" in str(test_file): + structs_file = Path(__file__).parent / "data/thresholds/aseg.stats.yaml" + elif "aparc+DKT" in str(test_file): + structs_file = Path(__file__).parent / "data/thresholds/aparc+DKT.stats.yaml" else: raise ValueError("Unknown test file") # Open the file_path and read the structs: into a list - with open(structs_file, 'r') as file: + with open(structs_file, "r") as file: data = yaml.safe_load(file) - structs = data.get('structs', []) + structs = data.get("structs", []) return structs -def read_measure_stats(file_path: str): +def read_measure_stats(file_path: Path): """ Read the measure stats from the given file path. Parameters ---------- - file_path : str + file_path : Path Path to the stats file. Returns @@ -97,7 +112,7 @@ def read_measure_stats(file_path: str): measurements = {} # Retrieve lines starting with "# Measure" from the stats file - with open(file_path, 'r') as file: + with open(file_path, "r") as file: # Read each line in the file for i, line in enumerate(file, 1): # Check if the line starts with "# ColHeaders" @@ -117,13 +132,13 @@ def read_measure_stats(file_path: str): return measure, measurements -def read_table(file_path: str): +def read_table(file_path: Path): """ Read the table from the given file path. Parameters ---------- - file_path : str + file_path : Path Path to the stats file. Returns @@ -135,10 +150,10 @@ def read_table(file_path: str): table_start = 0 columns = [] - file_path = os.path.join(file_path, "stats", "aseg.stats") + file_path = file_path / "stats" / "aseg.stats" # Retrieve stats table from the stats file - with open(file_path, 'r') as file: + with open(file_path, "r") as file: # Read each line in the file for i, line in enumerate(file, 1): # Check if the line starts with "# ColHeaders" @@ -148,7 +163,7 @@ def read_table(file_path: str): # Read the reference table into a pandas dataframe table = pd.read_table(file_path, skiprows=table_start, sep="\s+", header=None) - table_numeric = table.apply(pd.to_numeric, errors='coerce') + table_numeric = table.apply(pd.to_numeric, errors="coerce") table.columns = columns table.set_index(columns[0], inplace=True) @@ -156,17 +171,17 @@ def read_table(file_path: str): @pytest.mark.parametrize("test_subject", load_test_subjects()) -def test_measure_exists(subjects_dir, test_dir, test_subject): +def test_measure_exists(subjects_dir: Path, test_dir: Path, test_subject: Path): """ Test if the measure exists in the stats file. Parameters ---------- - subjects_dir : str + subjects_dir : Path Path to the subjects directory. - test_dir : str + test_dir : Path Name of the test directory. - test_subject : str + test_subject : Path Name of the test subject. Raises @@ -175,7 +190,7 @@ def test_measure_exists(subjects_dir, test_dir, test_subject): If the measure does not exist in the stats file. """ - test_subject = os.path.join(subjects_dir, test_dir, test_subject) + test_subject = subjects_dir / test_dir / test_subject test_file = load_stats_file(test_subject) data = read_measure_stats(test_file) ref_data = read_measure_stats(test_file) @@ -183,8 +198,9 @@ def test_measure_exists(subjects_dir, test_dir, test_subject): for struct in load_structs(test_file): if struct not in data[1]: - errors.append(f"for struct {struct} the value {data[1].get(struct)} is not close to " - f"{ref_data[1].get(struct)}") + errors.append( + f"for struct {struct} the value {data[1].get(struct)} is not close to " f"{ref_data[1].get(struct)}" + ) stats_data = read_measure_stats(test_file) @@ -193,20 +209,22 @@ def test_measure_exists(subjects_dir, test_dir, test_subject): @pytest.mark.parametrize("test_subject", load_test_subjects()) -def test_tables(subjects_dir, test_dir, reference_dir, test_subject, thresholds): +def test_tables(subjects_dir: Path, test_dir: Path, reference_dir: Path, test_subject: Path, thresholds): """ Test if the tables are within the threshold. Parameters ---------- - subjects_dir : str + subjects_dir : Path Path to the subjects directory. - test_dir : str + test_dir : Path Name of the test directory. - reference_dir : str + reference_dir : Path Name of the reference directory. - test_subject : str + test_subject : Path Name of the test subject. + thresholds : tuple + Tuple containing the default threshold and the thresholds. Raises ------ @@ -215,10 +233,10 @@ def test_tables(subjects_dir, test_dir, reference_dir, test_subject, thresholds) """ # Load the test and reference tables - test_file = os.path.join(subjects_dir, test_dir, test_subject) + test_file = subjects_dir / test_dir / test_subject test_table = read_table(test_file) - reference_subject = os.path.join(subjects_dir, reference_dir, test_subject) + reference_subject = subjects_dir / reference_dir / test_subject ref_table = read_table(reference_subject) # Load the thresholds