refactor: improve dockerfile by removing caches and comment blocks

jjjermiah · Feb 19, 2024 · b8ed8b0 · b8ed8b0
1 parent 4f79f98
commit b8ed8b0
Show file tree

Hide file tree

Showing 3 changed files with 90 additions and 88 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -1,11 +1,3 @@
-# This Dockerfile will create a container that builds the nbiatoolkit package
-# using the code in this repository
-#
-# To build the container, run the following command from the root of the
-# repository:
-# docker build -t nbiatoolkit .
-
-
 FROM python:3.12-slim
 
 LABEL maintainer="Jermiah Joseph jermiahjoseph98@gmail.com"
@@ -21,13 +13,10 @@ COPY . /nbiatoolkit
 WORKDIR /nbiatoolkit
 
 # install nbiatoolkit
-RUN pip install --upgrade pip
-RUN pip install .
+RUN pip install --no-cache-dir --upgrade pip
+RUN pip install --no-cache-dir .
 
 RUN NBIAToolkit --help
 
 # On run, open a bash shell
 CMD ["/bin/bash"]
-
-# to run this container in terminal mode, use the following command:
-# docker run -it --rm nbiatoolkit
diff --git a/README.md b/README.md
@@ -3,18 +3,24 @@
 [![PyTests](https://github.com/jjjermiah/nbia-toolkit/actions/workflows/main.yml/badge.svg)](https://github.com/jjjermiah/nbia-toolkit/actions/workflows/main.yml)
 [![Documentation Status](https://readthedocs.org/projects/nbia-toolkit/badge/?version=latest)](https://nbia-toolkit.readthedocs.io/en/latest/?badge=latest)
 [![codecov](https://codecov.io/gh/jjjermiah/nbia-toolkit/graph/badge.svg?token=JKREY71D0R)](https://codecov.io/gh/jjjermiah/nbia-toolkit)
+[![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit)](https://github.com/pre-commit/pre-commit)
 [![Python version](https://img.shields.io/pypi/pyversions/nbiatoolkit.svg)](https://img.shields.io/pypi/pyversions/nbiatoolkit.svg)
+[![CodeFactor](https://www.codefactor.io/repository/github/jjjermiah/nbia-toolkit/badge)](https://www.codefactor.io/repository/github/jjjermiah/nbia-toolkit)
+
 
+![GitHub release (latest by date)](https://img.shields.io/github/v/release/jjjermiah/nbia-toolkit)
 [![PyPI version](https://badge.fury.io/py/nbiatoolkit.svg)](https://badge.fury.io/py/nbiatoolkit)
 [![Downloads](https://static.pepy.tech/badge/nbiatoolkit)](https://pepy.tech/project/nbiatoolkit)
 [![PyPI - Downloads](https://img.shields.io/pypi/dm/nbiatoolkit.svg?label=pypi%20downloads)](https://pypi.org/project/nbiatoolkit/)
 ![GitHub repo size](https://img.shields.io/github/repo-size/jjjermiah/nbia-toolkit)
 [![Docker Pulls](https://img.shields.io/docker/pulls/jjjermiah/nbiatoolkit)](https://hub.docker.com/r/jjjermiah/nbiatoolkit)
-[![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit)](https://github.com/pre-commit/pre-commit)
+
 
 
 
 ![GitHub milestone details](https://img.shields.io/github/milestones/progress-percent/jjjermiah/nbia-toolkit/1?style=flat-square&label=1.0.0%20Stable%20Release%20Milestone&link=https%3A%2F%2Fgit.luolix.top%2Fjjjermiah%2Fnbia-toolkit%2Fmilestone%2F1)![GitHub milestone details](https://img.shields.io/github/milestones/progress/jjjermiah/nbia-toolkit/1?style=flat-square&label=%20&link=https%3A%2F%2Fgit.luolix.top%2Fjjjermiah%2Fnbia-toolkit%2Fmilestone%2F1)
+[![GitHub issues](https://img.shields.io/github/issues/jjjermiah/nbia-toolkit)](https://github.com/jjjermiah/nbia-toolkit/issues)
+![GitHub last commit](https://img.shields.io/github/last-commit/jjjermiah/nbia-toolkit)
 
 
 
@@ -44,21 +50,45 @@ It is made available via PyPI and can be installed using pip:
 pip install nbiatoolkit
 ```
 
+## Python Usage
+Using a context manager, you can easily access the NBIA database and query for metadata on collections, patients, studies, and series.
+
+``` python
+from nbiatoolkit import NBIAClient
+
+with NBIAClient() as client:
+    # Get a list of collections
+    collections = client.getCollections()
+    print(collections)
+
+    # Get a list of patients in a collection
+    patients = client.getPatients(Collection="TCGA-KIRC")
+    print(patients)
+
+    # Get a list of studies for a patient
+    studies = client.getStudies(PatientID="TCGA-BP-4989")
+    print(studies)
+
+    # Get a list of series for a study
+    series = client.getSeries(StudyInstanceUID=studies[0]["StudyInstanceUID"])
+    print(series[0:5])
+```
+
 ## CLI Usage
 For quick access to the NBIA, the toolkit also provides a command line interface (CLI)
 
 ``` bash NBIAToolkit-Output
 > NBIAToolkit --version
-    _   ______  _______  ______            ____   _ __ 
+    _   ______  _______  ______            ____   _ __
    / | / / __ )/  _/   |/_  __/___  ____  / / /__(_) /_
   /  |/ / __  |/ // /| | / / / __ \/ __ \/ / //_/ / __/
- / /|  / /_/ // // ___ |/ / / /_/ / /_/ / / ,< / / /_  
-/_/ |_/_____/___/_/  |_/_/  \____/\____/_/_/|_/_/\__/  
-                                                       
+ / /|  / /_/ // // ___ |/ / / /_/ / /_/ / / ,< / / /_
+/_/ |_/_____/___/_/  |_/_/  \____/\____/_/_/|_/_/\__/
+
 
 Version: 0.32.0
 
-Available CLI tools: 
+Available CLI tools:
 
 getCollections [-h] [-u USERNAME] [-pw PASSWORD] [-p PREFIX]
                [-o OUTPUTFILE] [--version]

diff --git a/src/nbiatoolkit/nbia.py b/src/nbiatoolkit/nbia.py
@@ -1,10 +1,6 @@
 from calendar import c
 from inspect import getmodule
 from re import I
-import re
-import aiohttp
-import asyncio
-import os
 import zipfile
 from tempfile import TemporaryDirectory
 from .dicomsort import DICOMSorter
@@ -29,7 +25,6 @@
 import io
 import zipfile
 
-import os
 from datetime import datetime
 
 # set __version__ variable
@@ -38,18 +33,67 @@
 
 # function that takes a list of dictionaries and returns either a list or a dataframe
 def conv_response_list(
-    response_json: List[dict[Any, Any]], return_type: ReturnType = ReturnType.LIST
+    response_json: List[dict[Any, Any]],
+    return_type: ReturnType,
 ) -> List[dict[Any, Any]] | pd.DataFrame:
     assert isinstance(response_json, list), "The response JSON must be a list"
 
     if return_type == ReturnType.LIST:
         return response_json
     elif return_type == ReturnType.DATAFRAME:
-        import pandas as pd
-
         return pd.DataFrame(data=response_json)
 
 
+def downloadSingleSeries(
+    SeriesInstanceUID: str,
+    downloadDir: str,
+    filePattern: str,
+    overwrite: bool,
+    api_headers: dict[str, str],
+    base_url: NBIA_ENDPOINTS,
+    log: Logger,
+):
+
+    # create query_url
+    query_url: str = base_url.value + NBIA_ENDPOINTS.DOWNLOAD_SERIES.value
+
+    params = dict()
+    params["SeriesInstanceUID"] = SeriesInstanceUID
+
+    # create a temporary directory
+
+    with TemporaryDirectory() as tempDir:
+        log.debug(f"Downloading series: {SeriesInstanceUID}")
+        response = requests.get(url=query_url, headers=api_headers, params=params)
+
+        file = zipfile.ZipFile(io.BytesIO(response.content))
+        file.extractall(path=tempDir)
+
+        try:
+            validateMD5(seriesDir=tempDir)
+        except Exception as e:
+            log.error(f"Error validating MD5 hash: {e}")
+            return False
+
+        # Create an instance of DICOMSorter with the desired target pattern
+        sorter = DICOMSorter(
+            sourceDir=tempDir,
+            destinationDir=downloadDir,
+            targetPattern=filePattern,
+            truncateUID=True,
+            sanitizeFilename=True,
+        )
+        # sorter.sortDICOMFiles(option="move", overwrite=overwrite)
+        if not sorter.sortDICOMFiles(
+            shutil_option="move", overwrite=overwrite, progressbar=False, n_parallel=1
+        ):
+            log.error(
+                f"Error sorting DICOM files for series {SeriesInstanceUID}\n \
+                    failed files located at {tempDir}"
+            )
+            return False
+
+
 class NBIAClient:
     """
     The NBIAClient class is a wrapper around the NBIA REST API. It provides
@@ -152,14 +196,6 @@ def query_api(
             parsed_response: List[dict[Any, Any]] | bytes = parse_response(
                 response=response
             )
-
-        except JSONDecodeError as j:
-            self._log.error("Error parsing response as JSON: %s", j)
-            if response is not None:
-                self._log.debug("Response: %s", response.text)
-                if not response.text.strip():
-                    self._log.error("Response text is empty.")
-            raise j
         except requests.exceptions.HTTPError as http_err:
             self._log.error("HTTP error occurred: %s", http_err)
             if response is None:
@@ -478,56 +514,3 @@ def parsePARAMS(self, params: dict) -> dict:
             if (value != "") and (key != "self") and (key != "return_type"):
                 PARAMS[key] = value
         return PARAMS
-
-
-# Use asyncio to make the request
-
-
-def downloadSingleSeries(
-    SeriesInstanceUID: str,
-    downloadDir: str,
-    filePattern: str,
-    overwrite: bool,
-    api_headers: dict[str, str],
-    base_url: NBIA_ENDPOINTS,
-    log: Logger,
-):
-
-    # create query_url
-    query_url: str = base_url.value + NBIA_ENDPOINTS.DOWNLOAD_SERIES.value
-
-    params = dict()
-    params["SeriesInstanceUID"] = SeriesInstanceUID
-
-    # create a temporary directory
-
-    with TemporaryDirectory() as tempDir:
-        log.debug(f"Downloading series: {SeriesInstanceUID}")
-        response = requests.get(url=query_url, headers=api_headers, params=params)
-
-        file = zipfile.ZipFile(io.BytesIO(response.content))
-        file.extractall(path=tempDir)
-
-        try:
-            validateMD5(seriesDir=tempDir)
-        except Exception as e:
-            log.error(f"Error validating MD5 hash: {e}")
-            return False
-
-        # Create an instance of DICOMSorter with the desired target pattern
-        sorter = DICOMSorter(
-            sourceDir=tempDir,
-            destinationDir=downloadDir,
-            targetPattern=filePattern,
-            truncateUID=True,
-            sanitizeFilename=True,
-        )
-        # sorter.sortDICOMFiles(option="move", overwrite=overwrite)
-        if not sorter.sortDICOMFiles(
-            shutil_option="move", overwrite=overwrite, progressbar=False, n_parallel=1
-        ):
-            log.error(
-                f"Error sorting DICOM files for series {SeriesInstanceUID}\n \
-                    failed files located at {tempDir}"
-            )
-            return False