Skip to content

Commit

Permalink
Merge pull request #421 from theislab/release
Browse files Browse the repository at this point in the history
v0.3.8
  • Loading branch information
le-ander authored Nov 26, 2021
2 parents 77a7b49 + bafe80c commit 9ee40b1
Show file tree
Hide file tree
Showing 133 changed files with 1,379 additions and 1,050 deletions.
2 changes: 1 addition & 1 deletion .bandit.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@
tests: []

# (optional) list skipped tests here:
skips: ['B101', 'B403', 'B404', 'B603', 'B607', 'B301', 'B303', 'B311', 'B310', 'B506']
skips: ['B101', 'B403', 'B404', 'B603', 'B607', 'B301', 'B303', 'B311', 'B310', 'B506', 'B321', 'B402']
3 changes: 1 addition & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
anndata>=0.7.6
crossref_commons
cellxgene-schema>=2.0.3
dask
docutils
fuzzywuzzy
Expand All @@ -21,7 +20,7 @@ PyYAML
scanpy>=1.7.0
scipy>=1.2.1
seaborn
tensorflow>=2.0.0 # TODO remove as soon as # 70 is solved
tensorflow # TODO remove as soon as # 70 is solved
tqdm
requests
versioneer
Expand Down
7 changes: 4 additions & 3 deletions sfaira/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-
"""A Data and Model Zoo for Single-Cell Genomics."""

from ._settings import settings
import sfaira.consts
import sfaira.data
import sfaira.genomes
Expand All @@ -22,7 +23,7 @@
"Lukas Heumos"
])
__email__ = ', '.join([
"leander.dony@helmholtz-muenchen.de",
"david.fischer@helmholtz-muenchen.de",
"lukas.heumos@helmholtz-muenchen.de"
"leander.dony@helmholtz-munich.de",
"david.fischer@helmholtz-munich.de",
"lukas.heumos@helmholtz-munich.de"
])
77 changes: 77 additions & 0 deletions sfaira/_settings.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
"""
Settings class which for example holds paths to cache directories used throughout the code.
"""

import os


SFAIRA_REPO_URL = "https://zenodo.org/record/4836517/files/"


class SfairaConfig:
"""\
Config manager for sfaira.
"""

def __init__(self):
self.sfaira_repo_url = SFAIRA_REPO_URL
self._cachedir_base = os.path.join(os.path.expanduser("~"), ".cache", "sfaira")
self._cachedir_databases = os.path.join(self._cachedir_base, "dataset_meta")
self._cachedir_databases_cellxgene = os.path.join(self._cachedir_databases, "cellxgene")
self._cachedir_genomes = os.path.join(self._cachedir_base, "genomes")
self._cachedir_ontologies = os.path.join(self._cachedir_base, "ontologies")

@property
def cachedir_base(self) -> str:
os.makedirs(self._cachedir_base, exist_ok=True)
return self._cachedir_base

@cachedir_base.setter
def cachedir_base(self, cachedir_base):
if not isinstance(cachedir_base, str):
raise ValueError(f"cachedir_base needs to be provided as a string, was {type(cachedir_base)}")
if cachedir_base == "repo":
cachedir_base = os.path.join(os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "cache")
self._cachedir_base = cachedir_base

@property
def cachedir_databases(self) -> str:
os.makedirs(self._cachedir_databases, exist_ok=True)
return self._cachedir_databases

@cachedir_databases.setter
def cachedir_databases(self, cachedir_databases):
raise ValueError("cachedir_databases cannot be set manually as it is defined as a subdirectory of"
" cachedir_base. please modify cachedir_base instead")

@property
def cachedir_databases_cellxgene(self) -> str:
os.makedirs(self._cachedir_databases_cellxgene, exist_ok=True)
return self._cachedir_databases_cellxgene

@cachedir_databases_cellxgene.setter
def cachedir_databases_cellxgene(self, cachedir_databases_cellxgene):
raise ValueError("cachedir_databases_cellxgene cannot be set manually as it is defined as a subdirectory"
" of cachedir_base. please modify cachedir_base instead")

@property
def cachedir_genomes(self) -> str:
os.makedirs(self._cachedir_genomes, exist_ok=True)
return self._cachedir_genomes

@cachedir_genomes.setter
def cachedir_genomes(self, cachedir_genomes):
raise ValueError("cachedir_genomes cannot be set manually as it is defined as a subdirectory of cachedir_base."
"please modify cachedir_base instead")

@property
def cachedir_ontologies(self) -> str:
os.makedirs(self._cachedir_ontologies, exist_ok=True)
return self._cachedir_ontologies

@cachedir_ontologies.setter
def cachedir_ontologies(self, cachedir_ontologies):
raise ValueError("cachedir_ontologies cannot be set manually as it is defined as a subdirectory of cachedir_base. please modify cachedir_base instead")


settings = SfairaConfig()
3 changes: 1 addition & 2 deletions sfaira/consts/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from sfaira.consts.adata_fields import AdataIds, AdataIdsSfaira, AdataIdsCellxgene, AdataIdsCellxgene_v2_0_0
from sfaira.consts.directories import CACHE_DIR, SFAIRA_REPO_URL
from sfaira.consts.meta_data_files import META_DATA_FIELDS
from sfaira.consts.ontologies import OntologyContainerSfaira
from sfaira.consts.ontologies import OntologyContainerSfaira, OTHER_ORGANISM_KEY
from sfaira.consts.utils import clean_cache

OCS = OntologyContainerSfaira()
8 changes: 2 additions & 6 deletions sfaira/consts/adata_fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,9 +89,7 @@ def __init__(self):
self.onto_original_suffix = "_original"

self.feature_kwargs = {
"match_to_reference": {
"human": "Homo_sapiens.GRCh38.104",
"mouse": "Mus_musculus.GRCm39.104"},
"match_to_release": "104",
"remove_gene_version": True,
"subset_genes_to_type": None}

Expand Down Expand Up @@ -305,6 +303,4 @@ class AdataIdsCellxgene_v2_0_0(AdataIdsCellxgene):

def __init__(self):
super(AdataIdsCellxgene_v2_0_0, self).__init__()
self.feature_kwargs["match_to_reference"] = {
"human": "Homo_sapiens.GRCh38.104",
"mouse": "Mus_musculus.GRCm39.104"}
self.feature_kwargs["match_to_release"] = "104"
16 changes: 0 additions & 16 deletions sfaira/consts/directories.py

This file was deleted.

58 changes: 40 additions & 18 deletions sfaira/consts/ontologies.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,16 @@

from sfaira.versions.metadata import OntologyList, OntologyCl
from sfaira.versions.metadata import OntologyCellosaurus, OntologyHancestro, OntologyHsapdv, OntologyMondo, \
OntologyMmusdv, OntologyEfo, OntologySex, OntologyUberon
OntologyMmusdv, OntologyEfo, OntologySex, OntologyTaxon, OntologyUberon, OntologyUberonLifecyclestage

OTHER_ORGANISM_KEY = "other"

DEFAULT_CL = "v2021-08-10"
DEFAULT_HSAPDV = "master"
DEFAULT_MONDO = "v2021-08-11"
DEFAULT_MMUSDV = "master"
DEFAULT_PATO = "v2021-08-06"
DEFAULT_NCBITAXON = "v2021-06-10"
DEFAULT_UBERON = "v2021-07-27"


Expand All @@ -26,9 +29,12 @@ class OntologyContainerSfaira:
_development_stage: Union[None, Dict[str, Union[OntologyHsapdv, OntologyMmusdv]]]
_ethnicity: Union[None, Dict[str, Union[OntologyHancestro, None]]]
_organ: Union[None, OntologyUberon]
_organism: Union[None, OntologyTaxon]
_sex: Union[None, OntologySex]

def __init__(self):
self.key_other = OTHER_ORGANISM_KEY

self.annotated = OntologyList(terms=[True, False])
self.author = None
self.assay_differentiation = None
Expand All @@ -50,7 +56,7 @@ def __init__(self):
self.individual = None
self.normalization = None
self._organ = None
self.organism = OntologyList(terms=["mouse", "human"]) # TODO introduce NCBItaxon here
self._organism = None
self.primary_data = OntologyList(terms=[True, False])
self.sample_source = OntologyList(terms=["primary_tissue", "2d_culture", "3d_culture", "tumor"])
self._sex = None
Expand All @@ -60,6 +66,12 @@ def __init__(self):
self.year = OntologyList(terms=list(range(2000, 3000)))

def reload_ontology(self, attr):
"""
Complex alternative to attribute-wise setters.
:param attr:
:return:
"""
kwargs = {"recache": True}
if attr == "assay_sc":
self._assay_sc = OntologyEfo(**kwargs)
Expand All @@ -69,37 +81,40 @@ def reload_ontology(self, attr):
self._cell_type = OntologyCl(branch=DEFAULT_CL, **kwargs)
elif attr == "development_stage":
self._development_stage = {
"human": OntologyHsapdv(**kwargs),
"mouse": OntologyMmusdv(**kwargs),
"homosapiens": OntologyHsapdv(**kwargs),
"musmusculus": OntologyMmusdv(**kwargs),
self.key_other: OntologyUberonLifecyclestage(branch=DEFAULT_UBERON, **kwargs),
}
elif attr == "disease":
self._disease = OntologyMondo(**kwargs)
elif attr == "ethnicity":
self._ethnicity = {
"human": OntologyHancestro(),
"mouse": None,
"homosapiens": OntologyHancestro(),
self.key_other: None,
}
elif attr == "organ":
self._organ = OntologyUberon(**kwargs)
elif attr == "organism":
self._organism = OntologyTaxon(**kwargs)
elif attr == "sex":
self._sex = OntologySex(**kwargs)
return self._assay_sc

@property
def assay_sc(self):
if self._assay_sc is None:
if self._assay_sc is None: # Lazy loading after class instantiation.
self._assay_sc = OntologyEfo()
return self._assay_sc

@property
def cell_line(self):
if self._cell_line is None:
if self._cell_line is None: # Lazy loading after class instantiation.
self._cell_line = OntologyCellosaurus()
return self._cell_line

@property
def cell_type(self):
if self._cell_type is None:
if self._cell_type is None: # Lazy loading after class instantiation.
self._cell_type = OntologyCl(branch=DEFAULT_CL)
return self._cell_type

Expand All @@ -109,36 +124,43 @@ def cell_type(self, x: str):

@property
def development_stage(self):
if self._development_stage is None:
if self._development_stage is None: # Lazy loading after class instantiation.
self._development_stage = {
"human": OntologyHsapdv(branch=DEFAULT_HSAPDV),
"mouse": OntologyMmusdv(branch=DEFAULT_MMUSDV),
"Homo sapiens": OntologyHsapdv(branch=DEFAULT_HSAPDV),
"Mus musculus": OntologyMmusdv(branch=DEFAULT_MMUSDV),
self.key_other: OntologyUberonLifecyclestage(branch=DEFAULT_UBERON),
}
return self._development_stage

@property
def disease(self):
if self._disease is None:
if self._disease is None: # Lazy loading after class instantiation.
self._disease = OntologyMondo(branch=DEFAULT_MONDO)
return self._disease

@property
def ethnicity(self):
if self._ethnicity is None:
if self._ethnicity is None: # Lazy loading after class instantiation.
self._ethnicity = {
"human": OntologyHancestro(),
"mouse": None,
"Homo sapiens": OntologyHancestro(),
self.key_other: None,
}
return self._ethnicity

@property
def organ(self):
if self._organ is None:
if self._organ is None: # Lazy loading after class instantiation.
self._organ = OntologyUberon(branch=DEFAULT_UBERON)
return self._organ

@property
def organism(self):
if self._organism is None: # Lazy loading after class instantiation.
self._organism = OntologyTaxon(branch=DEFAULT_NCBITAXON)
return self._organism

@property
def sex(self):
if self._sex is None:
if self._sex is None: # Lazy loading after class instantiation.
self._sex = OntologySex(branch=DEFAULT_PATO)
return self._sex
10 changes: 5 additions & 5 deletions sfaira/consts/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import shutil
from typing import Union

from sfaira.consts.directories import CACHE_DIR, CACHE_DIR_DATABASES, CACHE_DIR_GENOMES, CACHE_DIR_ONTOLOGIES
from sfaira import settings


def clean_cache(cache: Union[None, str] = None):
Expand All @@ -13,10 +13,10 @@ def clean_cache(cache: Union[None, str] = None):
"""
if cache is not None:
cache_dir_dict = {
"all": CACHE_DIR,
"dataset_meta": CACHE_DIR_DATABASES,
"genomes": CACHE_DIR_GENOMES,
"ontologies": CACHE_DIR_ONTOLOGIES,
"all": settings.cachedir_base,
"dataset_meta": settings.cachedir_databases,
"genomes": settings.cachedir_genomes,
"ontologies": settings.cachedir_ontologies,
}
if cache not in cache_dir_dict.keys():
raise ValueError(f"Did not find cache directory input {cache} in support list: "
Expand Down
Loading

0 comments on commit 9ee40b1

Please sign in to comment.