Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cache migration to settings container #418

Merged
merged 6 commits into from
Nov 26, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .bandit.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@
tests: []

# (optional) list skipped tests here:
skips: ['B101', 'B403', 'B404', 'B603', 'B607', 'B301', 'B303', 'B311', 'B310', 'B506']
skips: ['B101', 'B403', 'B404', 'B603', 'B607', 'B301', 'B303', 'B311', 'B310', 'B506', 'B321', 'B402']
7 changes: 4 additions & 3 deletions sfaira/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-
"""A Data and Model Zoo for Single-Cell Genomics."""

from ._settings import settings
import sfaira.consts
import sfaira.data
import sfaira.genomes
Expand All @@ -22,7 +23,7 @@
"Lukas Heumos"
])
__email__ = ', '.join([
"leander.dony@helmholtz-muenchen.de",
"david.fischer@helmholtz-muenchen.de",
"lukas.heumos@helmholtz-muenchen.de"
"leander.dony@helmholtz-munich.de",
"david.fischer@helmholtz-munich.de",
"lukas.heumos@helmholtz-munich.de"
])
77 changes: 77 additions & 0 deletions sfaira/_settings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
"""
Settings class which for example holds paths to cache directories used throughout the code.
"""

import os


SFAIRA_REPO_URL = "https://zenodo.org/record/4836517/files/"


class SfairaConfig:
"""\
Config manager for sfaira.
"""

def __init__(self):
self.sfaira_repo_url = SFAIRA_REPO_URL
self._cachedir_base = os.path.join(os.path.expanduser("~"), ".cache", "sfaira")
self._cachedir_databases = os.path.join(self._cachedir_base, "dataset_meta")
self._cachedir_databases_cellxgene = os.path.join(self._cachedir_databases, "cellxgene")
self._cachedir_genomes = os.path.join(self._cachedir_base, "genomes")
self._cachedir_ontologies = os.path.join(self._cachedir_base, "ontologies")

@property
def cachedir_base(self) -> str:
os.makedirs(self._cachedir_base, exist_ok=True)
return self._cachedir_base

@cachedir_base.setter
def cachedir_base(self, cachedir_base):
if not isinstance(cachedir_base, str):
raise ValueError(f"cachedir_base needs to be provided as a string, was {type(cachedir_base)}")
le-ander marked this conversation as resolved.
Show resolved Hide resolved
if cachedir_base == "repo":
cachedir_base = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "cache")
self._cachedir_base = cachedir_base

@property
def cachedir_databases(self) -> str:
os.makedirs(self._cachedir_databases, exist_ok=True)
return self._cachedir_databases

@cachedir_databases.setter
def cachedir_databases(self, cachedir_databases):
raise ValueError("cachedir_databases cannot be set manually as it is defined as a subdirectory of"
" cachedir_base. please modify cachedir_base instead")

@property
def cachedir_databases_cellxgene(self) -> str:
os.makedirs(self._cachedir_databases_cellxgene, exist_ok=True)
return self._cachedir_databases_cellxgene

@cachedir_databases_cellxgene.setter
def cachedir_databases_cellxgene(self, cachedir_databases_cellxgene):
raise ValueError("cachedir_databases_cellxgene cannot be set manually as it is defined as a subdirectory"
" of cachedir_base. please modify cachedir_base instead")

@property
def cachedir_genomes(self) -> str:
os.makedirs(self._cachedir_genomes, exist_ok=True)
return self._cachedir_genomes

@cachedir_genomes.setter
def cachedir_genomes(self, cachedir_genomes):
raise ValueError("cachedir_genomes cannot be set manually as it is defined as a subdirectory of cachedir_base."
"please modify cachedir_base instead")

@property
def cachedir_ontologies(self) -> str:
os.makedirs(self._cachedir_ontologies, exist_ok=True)
return self._cachedir_ontologies

@cachedir_ontologies.setter
def cachedir_ontologies(self, cachedir_ontologies):
raise ValueError("cachedir_ontologies cannot be set manually as it is defined as a subdirectory of cachedir_base. please modify cachedir_base instead")


settings = SfairaConfig()
1 change: 0 additions & 1 deletion sfaira/consts/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from sfaira.consts.adata_fields import AdataIds, AdataIdsSfaira, AdataIdsCellxgene, AdataIdsCellxgene_v2_0_0
from sfaira.consts.directories import CACHE_DIR, SFAIRA_REPO_URL
from sfaira.consts.meta_data_files import META_DATA_FIELDS
from sfaira.consts.ontologies import OntologyContainerSfaira, OTHER_ORGANISM_KEY
from sfaira.consts.utils import clean_cache
Expand Down
16 changes: 0 additions & 16 deletions sfaira/consts/directories.py

This file was deleted.

10 changes: 5 additions & 5 deletions sfaira/consts/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import shutil
from typing import Union

from sfaira.consts.directories import CACHE_DIR, CACHE_DIR_DATABASES, CACHE_DIR_GENOMES, CACHE_DIR_ONTOLOGIES
from sfaira import settings


def clean_cache(cache: Union[None, str] = None):
Expand All @@ -13,10 +13,10 @@ def clean_cache(cache: Union[None, str] = None):
"""
if cache is not None:
cache_dir_dict = {
"all": CACHE_DIR,
"dataset_meta": CACHE_DIR_DATABASES,
"genomes": CACHE_DIR_GENOMES,
"ontologies": CACHE_DIR_ONTOLOGIES,
"all": settings.cachedir_base,
"dataset_meta": settings.cachedir_databases,
"genomes": settings.cachedir_genomes,
"ontologies": settings.cachedir_ontologies,
}
if cache not in cache_dir_dict.keys():
raise ValueError(f"Did not find cache directory input {cache} in support list: "
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@
from typing import List, Union
import uuid

from sfaira import settings
from sfaira.data.dataloaders.base import DatasetBase
from sfaira.consts import AdataIdsCellxgene, AdataIdsCellxgene_v2_0_0
from sfaira.consts.directories import CACHE_DIR_DATABASES_CELLXGENE
from sfaira.data.dataloaders.databases.cellxgene.rest_helpers import get_collection, get_data
from sfaira.data.dataloaders.databases.cellxgene.rest_helpers import CELLXGENE_PRODUCTION_ENDPOINT, DOWNLOAD_DATASET

Expand Down Expand Up @@ -137,11 +137,9 @@ def __init__(
@property
def _collection_cache_dir(self):
"""
The cache dir is in a cache directory in the sfaira installation that is excempt from git versioning.
The cache dir is in a cache directory in the homedirectory of the user by default and can be user modified.
"""
cache_dir_path = pathlib.Path(CACHE_DIR_DATABASES_CELLXGENE)
cache_dir_path.mkdir(parents=True, exist_ok=True)
return CACHE_DIR_DATABASES_CELLXGENE
return settings.cachedir_databases_cellxgene

@property
def _collection_cache_fn(self):
Expand Down
5 changes: 3 additions & 2 deletions sfaira/ui/user_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@
import warnings
import time

from sfaira.consts import AdataIdsSfaira, AdataIds, OCS, SFAIRA_REPO_URL
from sfaira import settings
from sfaira.consts import AdataIdsSfaira, AdataIds, OCS
from sfaira.data import DatasetInteractive
from sfaira.estimators import EstimatorKerasEmbedding, EstimatorKerasCelltype
from sfaira.ui.model_zoo import ModelZoo
Expand Down Expand Up @@ -61,7 +62,7 @@ def __init__(
self.adata_ids = AdataIdsSfaira()

if sfaira_repo: # check if public sfaira repository should be accessed
self.model_lookuptable = self._load_lookuptable(SFAIRA_REPO_URL)
self.model_lookuptable = self._load_lookuptable(settings.sfaira_repo_url)

if custom_repo:
if isinstance(custom_repo, str):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@
import pytest
from typing import Union

from sfaira.consts import AdataIdsSfaira, CACHE_DIR
from sfaira import settings
from sfaira.consts import AdataIdsSfaira
from sfaira.data import DistributedStoreSingleFeatureSpace, DistributedStoreMultipleFeatureSpaceBase, load_store
from sfaira.estimators import EstimatorKeras, EstimatorKerasCelltype, EstimatorKerasEmbedding
from sfaira.versions.genomes.genomes import CustomFeatureContainer
Expand All @@ -17,8 +18,6 @@
from sfaira.unit_tests.data_for_tests.loaders.utils import PrepareData
from sfaira.unit_tests.directories import DIR_TEMP

CACHE_DIR_GENOMES = os.path.join(CACHE_DIR, "genomes")

ADATA_IDS = AdataIdsSfaira()
ASSEMBLY = {
"Homo sapiens": ASSEMBLY_HUMAN,
Expand Down Expand Up @@ -177,7 +176,7 @@ def init_topology(self, model_type: str, feature_space: str, organism: str):
if feature_space == "full":
# Read 500 genes (not full protein coding) to compromise between being able to distinguish observations
# and reducing run time of unit tests.
tab = pd.read_csv(os.path.join(CACHE_DIR_GENOMES, "_".join(organism.split(" ")).lower(),
tab = pd.read_csv(os.path.join(settings.cachedir_genomes, "_".join(organism.split(" ")).lower(),
ASSEMBLY[organism] + ".csv"))
genes_full = tab.loc[tab["gene_biotype"].values == "protein_coding", "gene_id"].values[:500].tolist()
topology["input"]["genes"] = ["ensg", genes_full]
Expand Down
8 changes: 6 additions & 2 deletions sfaira/unit_tests/tests_by_submodule/ui/test_userinterface.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,10 @@
import pandas as pd
import urllib.request

from sfaira import settings
from sfaira.ui import UserInterface
from sfaira.unit_tests.data_for_tests.loaders.utils import PrepareData
from sfaira.unit_tests import DIR_TEMP
from sfaira.consts import SFAIRA_REPO_URL


class HelperUi:
Expand All @@ -29,7 +29,11 @@ def prepare_local_tempfiles(self):
if not os.path.exists(self.temp_fn):
os.makedirs(self.temp_fn)
# download an example weight from sfaira repo
lookuptable = pd.read_csv(os.path.join(SFAIRA_REPO_URL, 'model_lookuptable.csv'), header=0, index_col=0)
lookuptable = pd.read_csv(
os.path.join(settings.sfaira_repo_url, 'model_lookuptable.csv'),
header=0,
index_col=0
)
url = lookuptable.loc[0, "model_file_path"]
if os.path.basename(url) not in os.listdir(self.temp_fn):
urllib.request.urlretrieve(url, os.path.join(self.temp_fn, os.path.basename(url)))
Expand Down
6 changes: 3 additions & 3 deletions sfaira/versions/genomes/genomes.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import urllib.error
import urllib.request

from sfaira.consts.directories import CACHE_DIR_GENOMES
from sfaira import settings

KEY_SYMBOL = "gene_name"
KEY_ID = "gene_id"
Expand All @@ -37,9 +37,9 @@ def __init__(self, release: str, organism: str):
@property
def cache_dir(self):
"""
The cache dir is in a cache directory in the sfaira installation that is excempt from git versioning.
The cache dir is in a cache directory in the homedirectory of the user by default and can be user modified.
"""
cache_dir_path = os.path.join(CACHE_DIR_GENOMES, self.ensembl_organism)
cache_dir_path = os.path.join(settings.cachedir_genomes, self.ensembl_organism)
cache_dir_path = pathlib.Path(cache_dir_path)
cache_dir_path.mkdir(parents=True, exist_ok=True)
return cache_dir_path
Expand Down
6 changes: 3 additions & 3 deletions sfaira/versions/metadata/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from typing import Dict, List, Tuple, Union


from sfaira.consts.directories import CACHE_DIR_ONTOLOGIES
from sfaira import settings

"""
Ontology managament classes.
Expand All @@ -33,7 +33,7 @@ def cached_load_file(url, ontology_cache_dir, ontology_cache_fn, recache: bool =
# TODO add caching option.
obofile = url
else:
ontology_cache_dir = os.path.join(CACHE_DIR_ONTOLOGIES, ontology_cache_dir)
ontology_cache_dir = os.path.join(settings.cachedir_ontologies, ontology_cache_dir)
obofile = os.path.join(ontology_cache_dir, ontology_cache_fn)
# Download if necessary:
if not os.path.isfile(obofile) or recache:
Expand Down Expand Up @@ -62,7 +62,7 @@ def cached_load_ebi(ontology_cache_dir, ontology_cache_fn, recache: bool = False
:param recache:
:return:
"""
ontology_cache_dir = os.path.join(CACHE_DIR_ONTOLOGIES, ontology_cache_dir)
ontology_cache_dir = os.path.join(settings.cachedir_ontologies, ontology_cache_dir)
picklefile = os.path.join(ontology_cache_dir, ontology_cache_fn)
if os.path.isfile(picklefile) and not recache:
with open(picklefile, 'rb') as f:
Expand Down