From b8ed8b00c6876949bf293c2150779fa2b7b3c124 Mon Sep 17 00:00:00 2001 From: Jermiah Joseph Date: Mon, 19 Feb 2024 16:37:04 -0500 Subject: [PATCH] refactor: improve dockerfile by removing caches and comment blocks --- Dockerfile | 15 +---- README.md | 42 ++++++++++++-- src/nbiatoolkit/nbia.py | 121 +++++++++++++++++----------------------- 3 files changed, 90 insertions(+), 88 deletions(-) diff --git a/Dockerfile b/Dockerfile index 238f782..6150a39 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,11 +1,3 @@ -# This Dockerfile will create a container that builds the nbiatoolkit package -# using the code in this repository -# -# To build the container, run the following command from the root of the -# repository: -# docker build -t nbiatoolkit . - - FROM python:3.12-slim LABEL maintainer="Jermiah Joseph jermiahjoseph98@gmail.com" @@ -21,13 +13,10 @@ COPY . /nbiatoolkit WORKDIR /nbiatoolkit # install nbiatoolkit -RUN pip install --upgrade pip -RUN pip install . +RUN pip install --no-cache-dir --upgrade pip +RUN pip install --no-cache-dir . RUN NBIAToolkit --help # On run, open a bash shell CMD ["/bin/bash"] - -# to run this container in terminal mode, use the following command: -# docker run -it --rm nbiatoolkit diff --git a/README.md b/README.md index b608c1b..c01edcf 100644 --- a/README.md +++ b/README.md @@ -3,18 +3,24 @@ [![PyTests](https://github.com/jjjermiah/nbia-toolkit/actions/workflows/main.yml/badge.svg)](https://github.com/jjjermiah/nbia-toolkit/actions/workflows/main.yml) [![Documentation Status](https://readthedocs.org/projects/nbia-toolkit/badge/?version=latest)](https://nbia-toolkit.readthedocs.io/en/latest/?badge=latest) [![codecov](https://codecov.io/gh/jjjermiah/nbia-toolkit/graph/badge.svg?token=JKREY71D0R)](https://codecov.io/gh/jjjermiah/nbia-toolkit) +[![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit)](https://github.com/pre-commit/pre-commit) [![Python version](https://img.shields.io/pypi/pyversions/nbiatoolkit.svg)](https://img.shields.io/pypi/pyversions/nbiatoolkit.svg) +[![CodeFactor](https://www.codefactor.io/repository/github/jjjermiah/nbia-toolkit/badge)](https://www.codefactor.io/repository/github/jjjermiah/nbia-toolkit) + +![GitHub release (latest by date)](https://img.shields.io/github/v/release/jjjermiah/nbia-toolkit) [![PyPI version](https://badge.fury.io/py/nbiatoolkit.svg)](https://badge.fury.io/py/nbiatoolkit) [![Downloads](https://static.pepy.tech/badge/nbiatoolkit)](https://pepy.tech/project/nbiatoolkit) [![PyPI - Downloads](https://img.shields.io/pypi/dm/nbiatoolkit.svg?label=pypi%20downloads)](https://pypi.org/project/nbiatoolkit/) ![GitHub repo size](https://img.shields.io/github/repo-size/jjjermiah/nbia-toolkit) [![Docker Pulls](https://img.shields.io/docker/pulls/jjjermiah/nbiatoolkit)](https://hub.docker.com/r/jjjermiah/nbiatoolkit) -[![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit)](https://github.com/pre-commit/pre-commit) + ![GitHub milestone details](https://img.shields.io/github/milestones/progress-percent/jjjermiah/nbia-toolkit/1?style=flat-square&label=1.0.0%20Stable%20Release%20Milestone&link=https%3A%2F%2Fgithub.com%2Fjjjermiah%2Fnbia-toolkit%2Fmilestone%2F1)![GitHub milestone details](https://img.shields.io/github/milestones/progress/jjjermiah/nbia-toolkit/1?style=flat-square&label=%20&link=https%3A%2F%2Fgithub.com%2Fjjjermiah%2Fnbia-toolkit%2Fmilestone%2F1) +[![GitHub issues](https://img.shields.io/github/issues/jjjermiah/nbia-toolkit)](https://github.com/jjjermiah/nbia-toolkit/issues) +![GitHub last commit](https://img.shields.io/github/last-commit/jjjermiah/nbia-toolkit) @@ -44,21 +50,45 @@ It is made available via PyPI and can be installed using pip: pip install nbiatoolkit ``` +## Python Usage +Using a context manager, you can easily access the NBIA database and query for metadata on collections, patients, studies, and series. + +``` python +from nbiatoolkit import NBIAClient + +with NBIAClient() as client: + # Get a list of collections + collections = client.getCollections() + print(collections) + + # Get a list of patients in a collection + patients = client.getPatients(Collection="TCGA-KIRC") + print(patients) + + # Get a list of studies for a patient + studies = client.getStudies(PatientID="TCGA-BP-4989") + print(studies) + + # Get a list of series for a study + series = client.getSeries(StudyInstanceUID=studies[0]["StudyInstanceUID"]) + print(series[0:5]) +``` + ## CLI Usage For quick access to the NBIA, the toolkit also provides a command line interface (CLI) ``` bash NBIAToolkit-Output > NBIAToolkit --version - _ ______ _______ ______ ____ _ __ + _ ______ _______ ______ ____ _ __ / | / / __ )/ _/ |/_ __/___ ____ / / /__(_) /_ / |/ / __ |/ // /| | / / / __ \/ __ \/ / //_/ / __/ - / /| / /_/ // // ___ |/ / / /_/ / /_/ / / ,< / / /_ -/_/ |_/_____/___/_/ |_/_/ \____/\____/_/_/|_/_/\__/ - + / /| / /_/ // // ___ |/ / / /_/ / /_/ / / ,< / / /_ +/_/ |_/_____/___/_/ |_/_/ \____/\____/_/_/|_/_/\__/ + Version: 0.32.0 -Available CLI tools: +Available CLI tools: getCollections [-h] [-u USERNAME] [-pw PASSWORD] [-p PREFIX] [-o OUTPUTFILE] [--version] diff --git a/src/nbiatoolkit/nbia.py b/src/nbiatoolkit/nbia.py index 6cc6a5f..80c708e 100644 --- a/src/nbiatoolkit/nbia.py +++ b/src/nbiatoolkit/nbia.py @@ -1,10 +1,6 @@ from calendar import c from inspect import getmodule from re import I -import re -import aiohttp -import asyncio -import os import zipfile from tempfile import TemporaryDirectory from .dicomsort import DICOMSorter @@ -29,7 +25,6 @@ import io import zipfile -import os from datetime import datetime # set __version__ variable @@ -38,18 +33,67 @@ # function that takes a list of dictionaries and returns either a list or a dataframe def conv_response_list( - response_json: List[dict[Any, Any]], return_type: ReturnType = ReturnType.LIST + response_json: List[dict[Any, Any]], + return_type: ReturnType, ) -> List[dict[Any, Any]] | pd.DataFrame: assert isinstance(response_json, list), "The response JSON must be a list" if return_type == ReturnType.LIST: return response_json elif return_type == ReturnType.DATAFRAME: - import pandas as pd - return pd.DataFrame(data=response_json) +def downloadSingleSeries( + SeriesInstanceUID: str, + downloadDir: str, + filePattern: str, + overwrite: bool, + api_headers: dict[str, str], + base_url: NBIA_ENDPOINTS, + log: Logger, +): + + # create query_url + query_url: str = base_url.value + NBIA_ENDPOINTS.DOWNLOAD_SERIES.value + + params = dict() + params["SeriesInstanceUID"] = SeriesInstanceUID + + # create a temporary directory + + with TemporaryDirectory() as tempDir: + log.debug(f"Downloading series: {SeriesInstanceUID}") + response = requests.get(url=query_url, headers=api_headers, params=params) + + file = zipfile.ZipFile(io.BytesIO(response.content)) + file.extractall(path=tempDir) + + try: + validateMD5(seriesDir=tempDir) + except Exception as e: + log.error(f"Error validating MD5 hash: {e}") + return False + + # Create an instance of DICOMSorter with the desired target pattern + sorter = DICOMSorter( + sourceDir=tempDir, + destinationDir=downloadDir, + targetPattern=filePattern, + truncateUID=True, + sanitizeFilename=True, + ) + # sorter.sortDICOMFiles(option="move", overwrite=overwrite) + if not sorter.sortDICOMFiles( + shutil_option="move", overwrite=overwrite, progressbar=False, n_parallel=1 + ): + log.error( + f"Error sorting DICOM files for series {SeriesInstanceUID}\n \ + failed files located at {tempDir}" + ) + return False + + class NBIAClient: """ The NBIAClient class is a wrapper around the NBIA REST API. It provides @@ -152,14 +196,6 @@ def query_api( parsed_response: List[dict[Any, Any]] | bytes = parse_response( response=response ) - - except JSONDecodeError as j: - self._log.error("Error parsing response as JSON: %s", j) - if response is not None: - self._log.debug("Response: %s", response.text) - if not response.text.strip(): - self._log.error("Response text is empty.") - raise j except requests.exceptions.HTTPError as http_err: self._log.error("HTTP error occurred: %s", http_err) if response is None: @@ -478,56 +514,3 @@ def parsePARAMS(self, params: dict) -> dict: if (value != "") and (key != "self") and (key != "return_type"): PARAMS[key] = value return PARAMS - - -# Use asyncio to make the request - - -def downloadSingleSeries( - SeriesInstanceUID: str, - downloadDir: str, - filePattern: str, - overwrite: bool, - api_headers: dict[str, str], - base_url: NBIA_ENDPOINTS, - log: Logger, -): - - # create query_url - query_url: str = base_url.value + NBIA_ENDPOINTS.DOWNLOAD_SERIES.value - - params = dict() - params["SeriesInstanceUID"] = SeriesInstanceUID - - # create a temporary directory - - with TemporaryDirectory() as tempDir: - log.debug(f"Downloading series: {SeriesInstanceUID}") - response = requests.get(url=query_url, headers=api_headers, params=params) - - file = zipfile.ZipFile(io.BytesIO(response.content)) - file.extractall(path=tempDir) - - try: - validateMD5(seriesDir=tempDir) - except Exception as e: - log.error(f"Error validating MD5 hash: {e}") - return False - - # Create an instance of DICOMSorter with the desired target pattern - sorter = DICOMSorter( - sourceDir=tempDir, - destinationDir=downloadDir, - targetPattern=filePattern, - truncateUID=True, - sanitizeFilename=True, - ) - # sorter.sortDICOMFiles(option="move", overwrite=overwrite) - if not sorter.sortDICOMFiles( - shutil_option="move", overwrite=overwrite, progressbar=False, n_parallel=1 - ): - log.error( - f"Error sorting DICOM files for series {SeriesInstanceUID}\n \ - failed files located at {tempDir}" - ) - return False