theislab · le-ander · Nov 26, 2021 · Nov 26, 2021 · Nov 26, 2021 · Nov 26, 2021
diff --git a/.bandit.yml b/.bandit.yml
@@ -4,4 +4,4 @@
 tests: []
 
 # (optional) list skipped tests here:
-skips: ['B101', 'B403', 'B404', 'B603', 'B607', 'B301', 'B303', 'B311', 'B310', 'B506']
+skips: ['B101', 'B403', 'B404', 'B603', 'B607', 'B301', 'B303', 'B311', 'B310', 'B506', 'B321', 'B402']
diff --git a/sfaira/__init__.py b/sfaira/__init__.py
@@ -1,6 +1,7 @@
 # -*- coding: utf-8 -*-
 """A Data and Model Zoo for Single-Cell Genomics."""
 
+from ._settings import settings
 import sfaira.consts
 import sfaira.data
 import sfaira.genomes
@@ -22,7 +23,7 @@
     "Lukas Heumos"
 ])
 __email__ = ', '.join([
-    "leander.dony@helmholtz-muenchen.de",
-    "david.fischer@helmholtz-muenchen.de",
-    "lukas.heumos@helmholtz-muenchen.de"
+    "leander.dony@helmholtz-munich.de",
+    "david.fischer@helmholtz-munich.de",
+    "lukas.heumos@helmholtz-munich.de"
 ])
diff --git a/sfaira/_settings.py b/sfaira/_settings.py
@@ -0,0 +1,77 @@
+"""
+Settings class which for example holds paths to cache directories used throughout the code.
+"""
+
+import os
+
+
+SFAIRA_REPO_URL = "https://zenodo.org/record/4836517/files/"
+
+
+class SfairaConfig:
+    """\
+    Config manager for sfaira.
+    """
+
+    def __init__(self):
+        self.sfaira_repo_url = SFAIRA_REPO_URL
+        self._cachedir_base = os.path.join(os.path.expanduser("~"), ".cache", "sfaira")
+        self._cachedir_databases = os.path.join(self._cachedir_base, "dataset_meta")
+        self._cachedir_databases_cellxgene = os.path.join(self._cachedir_databases, "cellxgene")
+        self._cachedir_genomes = os.path.join(self._cachedir_base, "genomes")
+        self._cachedir_ontologies = os.path.join(self._cachedir_base, "ontologies")
+
+    @property
+    def cachedir_base(self) -> str:
+        os.makedirs(self._cachedir_base, exist_ok=True)
+        return self._cachedir_base
+
+    @cachedir_base.setter
+    def cachedir_base(self, cachedir_base):
+        if not isinstance(cachedir_base, str):
+            raise ValueError(f"cachedir_base needs to be provided as a string, was {type(cachedir_base)}")
+        if cachedir_base == "repo":
+            cachedir_base = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "cache")
+        self._cachedir_base = cachedir_base
+
+    @property
+    def cachedir_databases(self) -> str:
+        os.makedirs(self._cachedir_databases, exist_ok=True)
+        return self._cachedir_databases
+
+    @cachedir_databases.setter
+    def cachedir_databases(self, cachedir_databases):
+        raise ValueError("cachedir_databases cannot be set manually as it is defined as a subdirectory of"
+                         " cachedir_base. please modify cachedir_base instead")
+
+    @property
+    def cachedir_databases_cellxgene(self) -> str:
+        os.makedirs(self._cachedir_databases_cellxgene, exist_ok=True)
+        return self._cachedir_databases_cellxgene
+
+    @cachedir_databases_cellxgene.setter
+    def cachedir_databases_cellxgene(self, cachedir_databases_cellxgene):
+        raise ValueError("cachedir_databases_cellxgene cannot be set manually as it is defined as a subdirectory"
+                         " of cachedir_base. please modify cachedir_base instead")
+
+    @property
+    def cachedir_genomes(self) -> str:
+        os.makedirs(self._cachedir_genomes, exist_ok=True)
+        return self._cachedir_genomes
+
+    @cachedir_genomes.setter
+    def cachedir_genomes(self, cachedir_genomes):
+        raise ValueError("cachedir_genomes cannot be set manually as it is defined as a subdirectory of cachedir_base."
+                         "please modify cachedir_base instead")
+
+    @property
+    def cachedir_ontologies(self) -> str:
+        os.makedirs(self._cachedir_ontologies, exist_ok=True)
+        return self._cachedir_ontologies
+
+    @cachedir_ontologies.setter
+    def cachedir_ontologies(self, cachedir_ontologies):
+        raise ValueError("cachedir_ontologies cannot be set manually as it is defined as a subdirectory of cachedir_base. please modify cachedir_base instead")
+
+
+settings = SfairaConfig()
diff --git a/sfaira/consts/__init__.py b/sfaira/consts/__init__.py
@@ -1,5 +1,4 @@
 from sfaira.consts.adata_fields import AdataIds, AdataIdsSfaira, AdataIdsCellxgene, AdataIdsCellxgene_v2_0_0
-from sfaira.consts.directories import CACHE_DIR, SFAIRA_REPO_URL
 from sfaira.consts.meta_data_files import META_DATA_FIELDS
 from sfaira.consts.ontologies import OntologyContainerSfaira, OTHER_ORGANISM_KEY
 from sfaira.consts.utils import clean_cache

diff --git a/sfaira/consts/directories.py b/sfaira/consts/directories.py
diff --git a/sfaira/consts/utils.py b/sfaira/consts/utils.py
@@ -2,7 +2,7 @@
 import shutil
 from typing import Union
 
-from sfaira.consts.directories import CACHE_DIR, CACHE_DIR_DATABASES, CACHE_DIR_GENOMES, CACHE_DIR_ONTOLOGIES
+from sfaira import settings
 
 
 def clean_cache(cache: Union[None, str] = None):
@@ -13,10 +13,10 @@ def clean_cache(cache: Union[None, str] = None):
     """
     if cache is not None:
         cache_dir_dict = {
-            "all": CACHE_DIR,
-            "dataset_meta": CACHE_DIR_DATABASES,
-            "genomes": CACHE_DIR_GENOMES,
-            "ontologies": CACHE_DIR_ONTOLOGIES,
+            "all": settings.cachedir_base,
+            "dataset_meta": settings.cachedir_databases,
+            "genomes": settings.cachedir_genomes,
+            "ontologies": settings.cachedir_ontologies,
         }
         if cache not in cache_dir_dict.keys():
             raise ValueError(f"Did not find cache directory input {cache} in support list: "

diff --git a/sfaira/data/dataloaders/databases/cellxgene/cellxgene_loader.py b/sfaira/data/dataloaders/databases/cellxgene/cellxgene_loader.py
@@ -8,9 +8,9 @@
 from typing import List, Union
 import uuid
 
+from sfaira import settings
 from sfaira.data.dataloaders.base import DatasetBase
 from sfaira.consts import AdataIdsCellxgene, AdataIdsCellxgene_v2_0_0
-from sfaira.consts.directories import CACHE_DIR_DATABASES_CELLXGENE
 from sfaira.data.dataloaders.databases.cellxgene.rest_helpers import get_collection, get_data
 from sfaira.data.dataloaders.databases.cellxgene.rest_helpers import CELLXGENE_PRODUCTION_ENDPOINT, DOWNLOAD_DATASET
 
@@ -137,11 +137,9 @@ def __init__(
     @property
     def _collection_cache_dir(self):
         """
-        The cache dir is in a cache directory in the sfaira installation that is excempt from git versioning.
+        The cache dir is in a cache directory in the homedirectory of the user by default and can be user modified.
         """
-        cache_dir_path = pathlib.Path(CACHE_DIR_DATABASES_CELLXGENE)
-        cache_dir_path.mkdir(parents=True, exist_ok=True)
-        return CACHE_DIR_DATABASES_CELLXGENE
+        return settings.cachedir_databases_cellxgene
 
     @property
     def _collection_cache_fn(self):

diff --git a/sfaira/ui/user_interface.py b/sfaira/ui/user_interface.py
@@ -8,7 +8,8 @@
 import warnings
 import time
 
-from sfaira.consts import AdataIdsSfaira, AdataIds, OCS, SFAIRA_REPO_URL
+from sfaira import settings
+from sfaira.consts import AdataIdsSfaira, AdataIds, OCS
 from sfaira.data import DatasetInteractive
 from sfaira.estimators import EstimatorKerasEmbedding, EstimatorKerasCelltype
 from sfaira.ui.model_zoo import ModelZoo
@@ -61,7 +62,7 @@ def __init__(
         self.adata_ids = AdataIdsSfaira()
 
         if sfaira_repo:  # check if public sfaira repository should be accessed
-            self.model_lookuptable = self._load_lookuptable(SFAIRA_REPO_URL)
+            self.model_lookuptable = self._load_lookuptable(settings.sfaira_repo_url)
 
         if custom_repo:
             if isinstance(custom_repo, str):

diff --git a/sfaira/unit_tests/tests_by_submodule/estimators/test_estimator.py b/sfaira/unit_tests/tests_by_submodule/estimators/test_estimator.py
@@ -6,7 +6,8 @@
 import pytest
 from typing import Union
 
-from sfaira.consts import AdataIdsSfaira, CACHE_DIR
+from sfaira import settings
+from sfaira.consts import AdataIdsSfaira
 from sfaira.data import DistributedStoreSingleFeatureSpace, DistributedStoreMultipleFeatureSpaceBase, load_store
 from sfaira.estimators import EstimatorKeras, EstimatorKerasCelltype, EstimatorKerasEmbedding
 from sfaira.versions.genomes.genomes import CustomFeatureContainer
@@ -17,8 +18,6 @@
 from sfaira.unit_tests.data_for_tests.loaders.utils import PrepareData
 from sfaira.unit_tests.directories import DIR_TEMP
 
-CACHE_DIR_GENOMES = os.path.join(CACHE_DIR, "genomes")
-
 ADATA_IDS = AdataIdsSfaira()
 ASSEMBLY = {
     "Homo sapiens": ASSEMBLY_HUMAN,
@@ -177,7 +176,7 @@ def init_topology(self, model_type: str, feature_space: str, organism: str):
         if feature_space == "full":
             # Read 500 genes (not full protein coding) to compromise between being able to distinguish observations
             # and reducing run time of unit tests.
-            tab = pd.read_csv(os.path.join(CACHE_DIR_GENOMES, "_".join(organism.split(" ")).lower(),
+            tab = pd.read_csv(os.path.join(settings.cachedir_genomes, "_".join(organism.split(" ")).lower(),
                                            ASSEMBLY[organism] + ".csv"))
             genes_full = tab.loc[tab["gene_biotype"].values == "protein_coding", "gene_id"].values[:500].tolist()
             topology["input"]["genes"] = ["ensg", genes_full]

diff --git a/sfaira/unit_tests/tests_by_submodule/ui/test_userinterface.py b/sfaira/unit_tests/tests_by_submodule/ui/test_userinterface.py
@@ -4,10 +4,10 @@
 import pandas as pd
 import urllib.request
 
+from sfaira import settings
 from sfaira.ui import UserInterface
 from sfaira.unit_tests.data_for_tests.loaders.utils import PrepareData
 from sfaira.unit_tests import DIR_TEMP
-from sfaira.consts import SFAIRA_REPO_URL
 
 
 class HelperUi:
@@ -29,7 +29,11 @@ def prepare_local_tempfiles(self):
         if not os.path.exists(self.temp_fn):
             os.makedirs(self.temp_fn)
         # download an example weight from sfaira repo
-        lookuptable = pd.read_csv(os.path.join(SFAIRA_REPO_URL, 'model_lookuptable.csv'), header=0, index_col=0)
+        lookuptable = pd.read_csv(
+            os.path.join(settings.sfaira_repo_url, 'model_lookuptable.csv'),
+            header=0,
+            index_col=0
+        )
         url = lookuptable.loc[0, "model_file_path"]
         if os.path.basename(url) not in os.listdir(self.temp_fn):
             urllib.request.urlretrieve(url, os.path.join(self.temp_fn, os.path.basename(url)))

diff --git a/sfaira/versions/genomes/genomes.py b/sfaira/versions/genomes/genomes.py
@@ -12,7 +12,7 @@
 import urllib.error
 import urllib.request
 
-from sfaira.consts.directories import CACHE_DIR_GENOMES
+from sfaira import settings
 
 KEY_SYMBOL = "gene_name"
 KEY_ID = "gene_id"
@@ -37,9 +37,9 @@ def __init__(self, release: str, organism: str):
     @property
     def cache_dir(self):
         """
-        The cache dir is in a cache directory in the sfaira installation that is excempt from git versioning.
+        The cache dir is in a cache directory in the homedirectory of the user by default and can be user modified.
         """
-        cache_dir_path = os.path.join(CACHE_DIR_GENOMES, self.ensembl_organism)
+        cache_dir_path = os.path.join(settings.cachedir_genomes, self.ensembl_organism)
         cache_dir_path = pathlib.Path(cache_dir_path)
         cache_dir_path.mkdir(parents=True, exist_ok=True)
         return cache_dir_path

diff --git a/sfaira/versions/metadata/base.py b/sfaira/versions/metadata/base.py
@@ -9,7 +9,7 @@
 from typing import Dict, List, Tuple, Union
 
 
-from sfaira.consts.directories import CACHE_DIR_ONTOLOGIES
+from sfaira import settings
 
 """
 Ontology managament classes.
@@ -33,7 +33,7 @@ def cached_load_file(url, ontology_cache_dir, ontology_cache_fn, recache: bool =
         # TODO add caching option.
         obofile = url
     else:
-        ontology_cache_dir = os.path.join(CACHE_DIR_ONTOLOGIES, ontology_cache_dir)
+        ontology_cache_dir = os.path.join(settings.cachedir_ontologies, ontology_cache_dir)
         obofile = os.path.join(ontology_cache_dir, ontology_cache_fn)
         # Download if necessary:
         if not os.path.isfile(obofile) or recache:
@@ -62,7 +62,7 @@ def cached_load_ebi(ontology_cache_dir, ontology_cache_fn, recache: bool = False
     :param recache:
     :return:
     """
-    ontology_cache_dir = os.path.join(CACHE_DIR_ONTOLOGIES, ontology_cache_dir)
+    ontology_cache_dir = os.path.join(settings.cachedir_ontologies, ontology_cache_dir)
     picklefile = os.path.join(ontology_cache_dir, ontology_cache_fn)
     if os.path.isfile(picklefile) and not recache:
         with open(picklefile, 'rb') as f: